diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f36b31e..3dc2e80 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -291,6 +291,14 @@ jobs: . -> target - name: Start RustFS + # Pinned to 1.0.0-beta.3 (2026-05-14) — the last known-good tag. + # `rustfs/rustfs:latest` (1.0.0-beta.4, 2026-05-21) added a + # credentials-policy check that refuses to start when + # AWS_ACCESS_KEY_ID/SECRET_ACCESS_KEY are values it considers + # "default" (rustfsadmin/rustfsadmin in our case). Bumping to + # beta.4+ requires either rotating those creds to less-default + # values or setting RUSTFS_ALLOW_INSECURE_DEFAULT_CREDENTIALS=true + # — deliberate work, not an emergency. Pin first; upgrade later. run: | docker rm -f rustfs >/dev/null 2>&1 || true docker run -d \ @@ -299,7 +307,7 @@ jobs: -p 9001:9001 \ -e RUSTFS_ACCESS_KEY="${AWS_ACCESS_KEY_ID}" \ -e RUSTFS_SECRET_KEY="${AWS_SECRET_ACCESS_KEY}" \ - rustfs/rustfs:latest \ + rustfs/rustfs:1.0.0-beta.3 \ /data - name: Install AWS CLI diff --git a/.github/workflows/publish-crates.yml b/.github/workflows/publish-crates.yml index d7f783f..9484b98 100644 --- a/.github/workflows/publish-crates.yml +++ b/.github/workflows/publish-crates.yml @@ -80,8 +80,15 @@ jobs: version=$(cargo metadata --format-version=1 --no-deps \ | jq -r --arg c "$crate" '.packages[] | select(.name==$c) | .version') + # crates.io API requires a User-Agent header — without it the + # API responds 403 and the skip check below would silently + # fall through to a real publish attempt that errors with + # "already exists on crates.io index" when re-running after a + # partial publish. Send a UA naming the workflow. local current - current=$(curl -fsSL "https://crates.io/api/v1/crates/${crate}" \ + current=$(curl -fsSL \ + -A 'ModernRelay-omnigraph-ci (https://github.com/ModernRelay/omnigraph)' \ + "https://crates.io/api/v1/crates/${crate}" \ | jq -r '.crate.max_version' || echo "") if [[ "$current" == "$version" ]]; then @@ -90,10 +97,28 @@ jobs: fi echo "==> publishing ${crate} ${version} (current crates.io: ${current:-none})" - cargo publish -p "$crate" --locked + # Defense in depth: if the skip check missed an existing + # version (e.g. crates.io API hiccup), cargo publish errors + # with "already exists on crates.io index". Treat that as + # success so the workflow can be re-run idempotently. + local output + if ! output=$(cargo publish -p "$crate" --locked 2>&1); then + echo "$output" + if echo "$output" | grep -q "already exists on crates.io"; then + echo "==> ${crate} ${version} was already published; treating as success" + return 0 + fi + return 1 + fi + echo "$output" } + # Order matters: each crate must precede anything that depends on it. + # omnigraph-compiler and omnigraph-policy have no internal deps; + # omnigraph-engine depends on both; server depends on engine + the + # two leaf crates; cli depends on everything. publish_if_new omnigraph-compiler + publish_if_new omnigraph-policy publish_if_new omnigraph-engine publish_if_new omnigraph-server publish_if_new omnigraph-cli diff --git a/.gitignore b/.gitignore index 919d9d8..2248d5a 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ __pycache__/ *.pyc demo/*.omni/ .omnigraph-rustfs-demo/ +/docs/internal # Local-only working files (not for the public repo) .claude/ diff --git a/AGENTS.md b/AGENTS.md index d1c1bde..a9cc9c0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,6 +1,6 @@ # OmniGraph — Agent Guide -This file is the always-on map for AI coding agents (Claude Code, Codex, Cursor, Cline) working in this repo. It is loaded into context on every turn, so it stays as a **map plus the rules and invariants that need to be in scope at all times** — the encyclopedia content lives under [`docs/`](docs/). When you need depth, follow a pointer. +This file is the always-on map for AI coding agents (Claude Code, Codex, Cursor, Cline) working in this codebase. It is loaded into context on every turn, so it stays as a **map plus the rules and invariants that need to be in scope at all times** — the encyclopedia content lives under [`docs/`](docs/). When you need depth, follow a pointer. **Required reading every session, every change:** @@ -16,9 +16,9 @@ Tools that support `@`-imports (Claude Code) auto-include all three files via th `CLAUDE.md` is a symlink to this file — there is exactly one source of truth. Edit `AGENTS.md`. -**Version surveyed:** 0.4.2 +**Version surveyed:** 0.6.0 **Workspace crates:** `omnigraph-compiler`, `omnigraph` (engine), `omnigraph-cli`, `omnigraph-server` -**Storage substrate:** Lance 4.x (columnar, versioned, branchable) +**Storage substrate:** Lance 6.x (columnar, versioned, branchable) **License:** MIT **Toolchain:** Rust stable, edition 2024 @@ -50,10 +50,10 @@ CLI (omnigraph) HTTP Server (omnigraph-server, Axum) omnigraph-compiler ── Pest grammars, catalog, IR, lowering, lint, migration plan │ ▼ - omnigraph (engine) ── ManifestRepo, CommitGraph, RunRegistry, GraphIndex (CSR/CSC), exec + omnigraph (engine) ── ManifestCoordinator, CommitGraph, RunRegistry, GraphIndex (CSR/CSC), exec │ ▼ - Lance 4.x ── columnar Arrow, fragments, per-dataset versions/branches, indexes + Lance 6.x ── columnar Arrow, fragments, per-dataset versions/branches, indexes │ ▼ Object store (file / s3 / RustFS / MinIO / S3-compat) @@ -167,35 +167,35 @@ If a proposal fits one of these, the burden is on the proposer to justify why th ## Quick-reference flows ```bash -# Initialize an S3-backed repo -omnigraph init --schema ./schema.pg s3://my-bucket/repo.omni +# Initialize an S3-backed graph +omnigraph init --schema ./schema.pg s3://my-bucket/graph.omni # Bulk load -omnigraph load --data ./seed.jsonl --mode overwrite s3://my-bucket/repo.omni +omnigraph load --data ./seed.jsonl --mode overwrite s3://my-bucket/graph.omni # Branch + ingest a review batch -omnigraph branch create --from main review/2026-04-25 s3://my-bucket/repo.omni -omnigraph ingest --branch review/2026-04-25 --data ./batch.jsonl s3://my-bucket/repo.omni +omnigraph branch create --from main review/2026-04-25 s3://my-bucket/graph.omni +omnigraph ingest --branch review/2026-04-25 --data ./batch.jsonl s3://my-bucket/graph.omni # Run a hybrid (vector + BM25) query omnigraph read --query ./queries.gq --name find_similar \ - --params '{"q":"trends in AI safety"}' --format table s3://my-bucket/repo.omni + --params '{"q":"trends in AI safety"}' --format table s3://my-bucket/graph.omni # Plan + apply schema migration -omnigraph schema plan --schema ./next.pg s3://my-bucket/repo.omni -omnigraph schema apply --schema ./next.pg s3://my-bucket/repo.omni --json +omnigraph schema plan --schema ./next.pg s3://my-bucket/graph.omni +omnigraph schema apply --schema ./next.pg s3://my-bucket/graph.omni --json # Merge review branch back -omnigraph branch merge review/2026-04-25 --into main s3://my-bucket/repo.omni +omnigraph branch merge review/2026-04-25 --into main s3://my-bucket/graph.omni # Compact + GC (preview, then confirm) -omnigraph optimize s3://my-bucket/repo.omni -omnigraph cleanup --keep 10 --older-than 7d s3://my-bucket/repo.omni -omnigraph cleanup --keep 10 --older-than 7d --confirm s3://my-bucket/repo.omni +omnigraph optimize s3://my-bucket/graph.omni +omnigraph cleanup --keep 10 --older-than 7d s3://my-bucket/graph.omni +omnigraph cleanup --keep 10 --older-than 7d --confirm s3://my-bucket/graph.omni # Stand up the HTTP server (token from env) OMNIGRAPH_SERVER_BEARER_TOKEN=xxxx \ - omnigraph-server s3://my-bucket/repo.omni --bind 0.0.0.0:8080 + omnigraph-server s3://my-bucket/graph.omni --bind 0.0.0.0:8080 # Cedar policy explain omnigraph policy explain --actor act-alice --action change --branch main @@ -222,7 +222,7 @@ omnigraph policy explain --actor act-alice --action change --branch main | Schema language | — | `.pg` + Pest grammar + catalog + interfaces + constraints + annotations | | Query language | — | `.gq` + Pest grammar + IR + lowering + linter | | Schema migration planning | — | `plan_schema_migration` + `apply_schema` step types + `__schema_apply_lock__` | -| Commit graph (DAG) across whole repo | — | `_graph_commits.lance` with linear + merge parents, ULID ids, actor map | +| Commit graph (DAG) across whole graph | — | `_graph_commits.lance` with linear + merge parents, ULID ids, actor map | | Per-query atomic writes | — | In-memory `MutationStaging.pending` accumulator + `stage_*` / `commit_staged` per touched table at end-of-query + publisher CAS via `commit_with_expected` (single manifest commit per `mutate_as` / `load`); D₂ parse-time rule keeps inserts/updates and deletes from mixing | | Three-way row-level merge | — | `OrderedTableCursor` + `StagedTableWriter`, structured `MergeConflictKind` | | Change feeds | — | `diff_between` / `diff_commits` with manifest fast path + ID streaming | diff --git a/Cargo.lock b/Cargo.lock index 0e3aac4..f93315e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -175,9 +175,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" +checksum = "378530e55cd479eda3c14eb345310799717e6f76d0c332041e8487022166b471" dependencies = [ "arrow-arith", "arrow-array", @@ -196,9 +196,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" +checksum = "a0ab212d2c1886e802f51c5212d78ebbcbb0bec980fff9dadc1eb8d45cd0b738" dependencies = [ "arrow-array", "arrow-buffer", @@ -210,9 +210,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" +checksum = "cfd33d3e92f207444098c75b42de99d329562be0cf686b307b097cc52b4e999e" dependencies = [ "ahash", "arrow-buffer", @@ -221,7 +221,7 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "num-complex", "num-integer", "num-traits", @@ -229,9 +229,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" +checksum = "0c6cd424c2693bcdbc150d843dc9d4d137dd2de4782ce6df491ad11a3a0416c0" dependencies = [ "bytes", "half", @@ -241,9 +241,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" +checksum = "4c5aefb56a2c02e9e2b30746241058b85f8983f0fcff2ba0c6d09006e1cded7f" dependencies = [ "arrow-array", "arrow-buffer", @@ -263,9 +263,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" +checksum = "e94e8cf7e517657a52b91ea1263acf38c4ca62a84655d72458a3359b12ab97de" dependencies = [ "arrow-array", "arrow-cast", @@ -278,9 +278,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" +checksum = "3c88210023a2bfee1896af366309a3028fc3bcbd6515fa29a7990ee1baa08ee0" dependencies = [ "arrow-buffer", "arrow-schema", @@ -291,9 +291,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" +checksum = "238438f0834483703d88896db6fe5a7138b2230debc31b34c0336c2996e3c64f" dependencies = [ "arrow-array", "arrow-buffer", @@ -301,21 +301,22 @@ dependencies = [ "arrow-schema", "arrow-select", "flatbuffers", - "lz4_flex 0.12.1", + "lz4_flex", "zstd", ] [[package]] name = "arrow-json" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" +checksum = "205ca2119e6d679d5c133c6f30e68f027738d95ed948cf77677ea69c7800036b" dependencies = [ "arrow-array", "arrow-buffer", "arrow-cast", - "arrow-data", + "arrow-ord", "arrow-schema", + "arrow-select", "chrono", "half", "indexmap 2.13.0", @@ -331,9 +332,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" +checksum = "1bffd8fd2579286a5d63bac898159873e5094a79009940bcb42bbfce4f19f1d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -344,9 +345,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" +checksum = "bab5994731204603c73ba69267616c50f80780774c6bb0476f1f830625115e0c" dependencies = [ "arrow-array", "arrow-buffer", @@ -357,9 +358,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" +checksum = "f633dbfdf39c039ada1bf9e34c694816eb71fbb7dc78f613993b7245e078a1ed" dependencies = [ "bitflags", "serde_core", @@ -368,9 +369,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" +checksum = "8cd065c54172ac787cf3f2f8d4107e0d3fdc26edba76fdf4f4cc170258942222" dependencies = [ "ahash", "arrow-array", @@ -382,9 +383,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" +checksum = "29dd7cda3ab9692f43a2e4acc444d760cc17b12bb6d8232ddf64e9bab7c06b42" dependencies = [ "arrow-array", "arrow-buffer", @@ -464,7 +465,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -475,7 +476,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -958,7 +959,7 @@ checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -1105,31 +1106,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "bon" -version = "3.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" -dependencies = [ - "bon-macros", - "rustversion", -] - -[[package]] -name = "bon-macros" -version = "3.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" -dependencies = [ - "darling", - "ident_case", - "prettyplease", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.115", -] - [[package]] name = "borsh" version = "1.6.1" @@ -1290,12 +1266,6 @@ dependencies = [ "smol_str", ] -[[package]] -name = "census" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" - [[package]] name = "cfg-if" version = "1.0.4" @@ -1310,9 +1280,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.43" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ "iana-time-zone", "js-sys", @@ -1373,7 +1343,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -1645,7 +1615,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -1656,7 +1626,7 @@ checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ "darling_core", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -1675,9 +1645,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43c18ba387f9c05ac1f3be32a73f8f3cc6c1cfc43e5d4b7a8e5b0d3a5eb48dc7" +checksum = "93db0e623840612f7f2cd757f7e8a8922064192363732c88692e0870016e141b" dependencies = [ "arrow", "arrow-schema", @@ -1711,7 +1681,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "rand 0.9.2", "regex", @@ -1724,9 +1694,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c75a4ce672b27fb8423810efb92a3600027717a1664d06a2c307eeeabcec694" +checksum = "37cefde60b26a7f4ff61e9d2ff2833322f91df2b568d7238afe67bde5bdffb66" dependencies = [ "arrow", "async-trait", @@ -1742,16 +1712,16 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "tokio", ] [[package]] name = "datafusion-catalog-listing" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c8b9a3795ffb46bf4957a34c67d89a67558b311ae455c8d4295ff2115eeea50" +checksum = "17e112307715d6a7a331111a4c2330ff54bc237183511c319e3708a4cff431fb" dependencies = [ "arrow", "async-trait", @@ -1767,14 +1737,14 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", ] [[package]] name = "datafusion-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "205dc1e20441973f470e6b7ef87626a3b9187970e5106058fef1b713047f770c" +checksum = "d72a11ca44a95e1081870d3abb80c717496e8a7acb467a1d3e932bb636af5cc2" dependencies = [ "ahash", "arrow", @@ -1783,9 +1753,10 @@ dependencies = [ "half", "hashbrown 0.16.1", "indexmap 2.13.0", + "itertools 0.14.0", "libc", "log", - "object_store", + "object_store 0.13.2", "paste", "sqlparser", "tokio", @@ -1794,9 +1765,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cf5880c02ff6f5f11fb5bc19211789fb32fd3c53d79b7d6cb2b12e401312ba0" +checksum = "89f4afaed29670ec4fd6053643adc749fe3f4bc9d1ce1b8c5679b22c67d12def" dependencies = [ "futures", "log", @@ -1805,9 +1776,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc614d6e709450e29b7b032a42c1bdb705f166a6b2edef7bed7c7897eb905499" +checksum = "e9fb386e1691355355a96419978a0022b7947b44d4a24a6ea99f00b6b485cbb6" dependencies = [ "arrow", "async-trait", @@ -1826,7 +1797,7 @@ dependencies = [ "glob", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "rand 0.9.2", "tokio", "url", @@ -1834,9 +1805,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e497d5fc48dac7ce86f6b4fb09a3a494385774af301ff20ec91aebfae9b05b4" +checksum = "ffa6c52cfed0734c5f93754d1c0175f558175248bf686c944fb05c373e5fc096" dependencies = [ "arrow", "arrow-ipc", @@ -1852,15 +1823,15 @@ dependencies = [ "datafusion-session", "futures", "itertools 0.14.0", - "object_store", + "object_store 0.13.2", "tokio", ] [[package]] name = "datafusion-datasource-csv" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dfc250cad940d0327ca2e9109dc98830892d17a3d6b2ca11d68570e872cf379" +checksum = "503f29e0582c1fc189578d665ff57d9300da1f80c282777d7eb67bb79fb8cdca" dependencies = [ "arrow", "async-trait", @@ -1874,16 +1845,16 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store", + "object_store 0.13.2", "regex", "tokio", ] [[package]] name = "datafusion-datasource-json" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91e9677ed62833b0e8129dec0d1a8f3c9bb7590bd6dd714a43e4c3b663e4aa0" +checksum = "e33804749abc8d0c8cb7473228483cb8070e524c6f6086ee1b85a64debe2b3d2" dependencies = [ "arrow", "async-trait", @@ -1897,31 +1868,35 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store", + "object_store 0.13.2", + "serde_json", "tokio", + "tokio-stream", ] [[package]] name = "datafusion-doc" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e13e5fe3447baa0584b61ee8644086e007e1ef6e58f4be48bc8a72417854729" +checksum = "8de6ac0df1662b9148ad3c987978b32cbec7c772f199b1d53520c8fa764a87ee" [[package]] name = "datafusion-execution" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48a6cc03e34899a54546b229235f7b192634c8e832f78a267f0989b18216c56d" +checksum = "c03c7fbdaefcca4ef6ffe425a5fc2325763bfb426599bb0bf4536466efabe709" dependencies = [ "arrow", + "arrow-buffer", "async-trait", "chrono", "dashmap", "datafusion-common", "datafusion-expr", + "datafusion-physical-expr-common", "futures", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "rand 0.9.2", "tempfile", @@ -1930,9 +1905,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee3315d87eca7a7df58e52a1fb43b4c4171b545fd30ffc3102945c162a9f6ddb" +checksum = "574b9b6977fedbd2a611cbff12e5caf90f31640ad9dc5870f152836d94bad0dd" dependencies = [ "arrow", "async-trait", @@ -1952,9 +1927,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c6d83feae0753799f933a2c47dfd15980c6947960cb95ed60f5c1f885548b3" +checksum = "7d7c3adf3db8bf61e92eb90cb659c8e8b734593a8f7c8e12a843c7ddba24b87e" dependencies = [ "arrow", "datafusion-common", @@ -1965,9 +1940,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b82962015cc3db4d7662459c9f7fcda0591b5edacb8af1cf3bc3031f274800" +checksum = "f28aa4e10384e782774b10e72aca4d93ef7b31aa653095d9d4536b0a3dbc51b6" dependencies = [ "arrow", "arrow-buffer", @@ -1986,6 +1961,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "memchr", "num-traits", "rand 0.9.2", "regex", @@ -1996,9 +1972,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e42c227d9e55a6c8041785d4a8a117e4de531033d480aae10984247ac62e27e" +checksum = "00aa6217e56098ba84e0a338176fe52f0a84cca398021512c6c8c5eff806d0ad" dependencies = [ "ahash", "arrow", @@ -2012,14 +1988,15 @@ dependencies = [ "datafusion-physical-expr-common", "half", "log", + "num-traits", "paste", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cead3cfed825b0b688700f4338d281cd7857e4907775a5b9554c083edd5f3f95" +checksum = "b511250349407db7c43832ab2de63f5557b19a20dfd236b39ca2c04468b50d47" dependencies = [ "ahash", "arrow", @@ -2030,9 +2007,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62ea99612970aebab8cf864d02eb3d296bbab7f4881e1023d282b57fe431b201" +checksum = "ef13a858e20d50f0a9bb5e96e7ac82b4e7597f247515bccca4fdd2992df0212a" dependencies = [ "arrow", "arrow-ord", @@ -2046,16 +2023,18 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", + "hashbrown 0.16.1", "itertools 0.14.0", + "itoa", "log", "paste", ] [[package]] name = "datafusion-functions-table" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d83dbf3ab8b9af6f209b068825a7adbd3b88bf276f2a1ec14ba09567b97f5674" +checksum = "72b40d3f5bbb3905f9ccb1ce9485a9595c77b69758a7c24d3ba79e334ff51e7e" dependencies = [ "arrow", "async-trait", @@ -2069,9 +2048,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "732edabe07496e2fc5a1e57a284d7a36edcea445a2821119770a0dea624b472c" +checksum = "d4e88ec9d57c9b685d02f58bfee7be62d72610430ddcedb82a08e5d9925dbfb6" dependencies = [ "arrow", "datafusion-common", @@ -2087,9 +2066,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c6e30e09700799bd52adce8c377ab03dda96e73a623e4803a31ad94fe7ce14" +checksum = "8307bb93519b1a91913723a1130cfafeee3f72200d870d88e91a6fc5470ede5c" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2097,20 +2076,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402f2a8ed70fb99a18f71580a1fe338604222a3d32ddeac6e72c5b34feea2d4d" +checksum = "2e367e6a71051d0ebdd29b2f85d12059b38b1d1f172c6906e80016da662226bd" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] name = "datafusion-optimizer" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99f32edb8ba12f08138f86c09b80fae3d4a320551262fa06b91d8a8cb3065a5b" +checksum = "e929015451a67f77d9d8b727b2bf3a40c4445fdef6cdc53281d7d97c76888ace" dependencies = [ "arrow", "chrono", @@ -2127,9 +2106,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "987c5e29e96186589301b42e25aa7d11bbe319a73eb02ef8d755edc55b5b89fc" +checksum = "4b1e68aba7a4b350401cfdf25a3d6f989ad898a7410164afe9ca52080244cb59" dependencies = [ "ahash", "arrow", @@ -2150,9 +2129,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1de89d0afa08b6686697bd8a6bac4ba2cd44c7003356e1bce6114d5a93f94b5c" +checksum = "ea22315f33cf2e0adc104e8ec42e285f6ed93998d565c65e82fec6a9ee9f9db4" dependencies = [ "arrow", "datafusion-common", @@ -2165,9 +2144,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "602d1970c0fe87f1c3a36665d131fbfe1c4379d35f8fc5ec43a362229ad2954d" +checksum = "b04b45ea8ad3ac2d78f2ea2a76053e06591c9629c7a603eda16c10649ecf4362" dependencies = [ "ahash", "arrow", @@ -2182,9 +2161,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b24d704b6385ebe27c756a12e5ba15684576d3b47aeca79cc9fb09480236dc32" +checksum = "7cb13397809a425918f608dfe8653f332015a3e330004ab191b4404187238b95" dependencies = [ "arrow", "datafusion-common", @@ -2200,9 +2179,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c21d94141ea5043e98793f170798e9c1887095813b8291c5260599341e383a38" +checksum = "5edc023675791af9d5fb4cc4c24abf5f7bd3bd4dcf9e5bd90ea1eff6976dcc79" dependencies = [ "ahash", "arrow", @@ -2224,6 +2203,7 @@ dependencies = [ "indexmap 2.13.0", "itertools 0.14.0", "log", + "num-traits", "parking_lot", "pin-project-lite", "tokio", @@ -2231,9 +2211,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a68cce43d18c0dfac95cacd74e70565f7e2fb12b9ed41e2d312f0fa837626b1" +checksum = "ac8c76860e355616555081cab5968cec1af7a80701ff374510860bcd567e365a" dependencies = [ "arrow", "datafusion-common", @@ -2248,9 +2228,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4e1c40a0b1896aed4a4504145c2eb7fa9b9da13c2d04b40a4767a09f076199" +checksum = "5412111aa48e2424ba926112e192f7a6b7e4ccb450145d25ce5ede9f19dc491e" dependencies = [ "async-trait", "datafusion-common", @@ -2262,15 +2242,16 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f1891e5b106d1d73c7fe403bd8a265d19c3977edc17f60808daf26c2fe65ffb" +checksum = "fa0d133ddf8b9b3b872acac900157f783e7b879fe9a6bccf389abebbfac45ec1" dependencies = [ "arrow", "bigdecimal", "chrono", "datafusion-common", "datafusion-expr", + "datafusion-functions-nested", "indexmap 2.13.0", "log", "regex", @@ -2365,7 +2346,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -2377,12 +2358,6 @@ dependencies = [ "const-random", ] -[[package]] -name = "downcast-rs" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc" - [[package]] name = "dunce" version = "1.0.5" @@ -2404,7 +2379,7 @@ dependencies = [ "enum-ordinalize", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -2448,7 +2423,7 @@ checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -2521,12 +2496,6 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55" -[[package]] -name = "fastdivide" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" - [[package]] name = "fastrand" version = "2.3.0" @@ -2601,16 +2570,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fs4" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8" -dependencies = [ - "rustix 0.38.44", - "windows-sys 0.52.0", -] - [[package]] name = "fs_extra" version = "1.3.0" @@ -2619,9 +2578,9 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2195cc7f87e84bd695586137de99605e7e9579b26ec5e01b82960ddb4d0922f2" +checksum = "83cf860f6a6bf0a6a60fdfe5a36c75121fad5ea4332d1d12deee3e65b6047727" dependencies = [ "arrow-array", "rand 0.9.2", @@ -2698,7 +2657,7 @@ checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -2887,8 +2846,6 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "allocator-api2", - "equivalent", "foldhash 0.1.5", ] @@ -2903,6 +2860,12 @@ dependencies = [ "foldhash 0.2.0", ] +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + [[package]] name = "heck" version = "0.5.0" @@ -2939,12 +2902,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "htmlescape" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" - [[package]] name = "http" version = "0.2.12" @@ -3307,6 +3264,17 @@ dependencies = [ "generic-array", ] +[[package]] +name = "io-uring" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d09b98f7eace8982db770e4408e7470b028ce513ac28fecdc6bf4c30fe92b62" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -3376,7 +3344,7 @@ checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -3425,7 +3393,7 @@ dependencies = [ "fast-float2", "itoa", "jiff", - "nom 8.0.0", + "nom", "num-traits", "ordered-float", "rand 0.9.2", @@ -3492,14 +3460,15 @@ dependencies = [ [[package]] name = "lance" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efe6c3ddd79cdfd2b7e1c23cafae52806906bc40fbd97de9e8cf2f8c7a75fc04" +checksum = "d34e854994e84d043897f5ec9fb609221e9e69e3fd52996cd715d979fcd349f6" dependencies = [ "arrow", "arrow-arith", "arrow-array", "arrow-buffer", + "arrow-cast", "arrow-ipc", "arrow-ord", "arrow-row", @@ -3535,12 +3504,14 @@ dependencies = [ "lance-linalg", "lance-namespace", "lance-table", + "lance-tokenizer", "log", "moka", - "object_store", + "object_store 0.12.5", "permutation", "pin-project", "prost", + "prost-build", "prost-types", "rand 0.9.2", "roaring", @@ -3548,7 +3519,6 @@ dependencies = [ "serde", "serde_json", "snafu", - "tantivy", "tokio", "tokio-stream", "tokio-util", @@ -3559,14 +3529,15 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d9f5d95bdda2a2b790f1fb8028b5b6dcf661abeb3133a8bca0f3d24b054af87" +checksum = "7827fe404358c27d120ee8ea8ef7b9415c2911d54072bec83dd689d750ae65da" dependencies = [ "arrow-array", "arrow-buffer", "arrow-cast", "arrow-data", + "arrow-ipc", "arrow-ord", "arrow-schema", "arrow-select", @@ -3581,9 +3552,9 @@ dependencies = [ [[package]] name = "lance-bitpacking" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f827d6ab9f8f337a9509d5ad66a12f3314db8713868260521c344ef6135eb4e4" +checksum = "2cd0b31570d50fe13c7e4e36b03e1f1c99c3d8e5a34845b24b0665b51b40570d" dependencies = [ "arrayref", "paste", @@ -3592,9 +3563,9 @@ dependencies = [ [[package]] name = "lance-core" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f1e25df6a79bf72ee6bcde0851f19b1cd36c5848c1b7db83340882d3c9fdecb" +checksum = "b128c213c676cb8e03c62a68670642770825171e64097cc2da97cbb19fe35d29" dependencies = [ "arrow-array", "arrow-buffer", @@ -3614,7 +3585,7 @@ dependencies = [ "mock_instant", "moka", "num_cpus", - "object_store", + "object_store 0.12.5", "pin-project", "prost", "rand 0.9.2", @@ -3631,13 +3602,14 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93146de8ae720cb90edef81c2f2d0a1b065fc2f23ecff2419546f389b0fa70a4" +checksum = "e03b2de71cbcd09b10bf1a17c83cacbc0176ecd97203fb72b9e59d9b8f9a3743" dependencies = [ "arrow", "arrow-array", "arrow-buffer", + "arrow-cast", "arrow-ord", "arrow-schema", "arrow-select", @@ -3663,9 +3635,9 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccec8ce4d8e0a87a99c431dab2364398029f2ffb649c1a693c60c79e05ed30dd" +checksum = "2fe7c7ea7fd397e495a1646fec360e46ee0cbd75718f1c0e887aad657c5f2944" dependencies = [ "arrow", "arrow-array", @@ -3676,16 +3648,16 @@ dependencies = [ "half", "hex", "rand 0.9.2", - "rand_distr 0.5.1", + "rand_distr", "rand_xoshiro", "random_word", ] [[package]] name = "lance-encoding" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1aec0bbbac6bce829bc10f1ba066258126100596c375fb71908ecf11c2c2a5" +checksum = "fe3f8070835b407d8db9ea8728386bc3207ba23c66a9c22d344e231ef12b77ca" dependencies = [ "arrow-arith", "arrow-array", @@ -3722,9 +3694,9 @@ dependencies = [ [[package]] name = "lance-file" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14a8c548804f5b17486dc2d3282356ed1957095a852780283bc401fdd69e9075" +checksum = "a6dfcf654549330df3aef708cd7c12e170feecddd34d6c19dd005b4153213268" dependencies = [ "arrow-arith", "arrow-array", @@ -3745,7 +3717,7 @@ dependencies = [ "lance-io", "log", "num-traits", - "object_store", + "object_store 0.12.5", "prost", "prost-build", "prost-types", @@ -3756,9 +3728,9 @@ dependencies = [ [[package]] name = "lance-index" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2da212f0090ea59f79ac3686660f596520c167fe1cb5f408900cf71d215f0e03" +checksum = "4fb8ad0bd10efa2608634a2518b7dd501231e76c56a65fbd6519e23914cc425a" dependencies = [ "arrow", "arrow-arith", @@ -3795,16 +3767,17 @@ dependencies = [ "lance-io", "lance-linalg", "lance-table", + "lance-tokenizer", "libm", "log", "ndarray", "num-traits", - "object_store", + "object_store 0.12.5", "prost", "prost-build", "prost-types", "rand 0.9.2", - "rand_distr 0.5.1", + "rand_distr", "rangemap", "rayon", "roaring", @@ -3812,7 +3785,6 @@ dependencies = [ "serde_json", "smallvec", "snafu", - "tantivy", "tempfile", "tokio", "tracing", @@ -3822,9 +3794,9 @@ dependencies = [ [[package]] name = "lance-io" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d958eb4b56f03bbe0f5f85eb2b4e9657882812297b6f711f201ffc995f259f" +checksum = "ef5314703fa8c8baed04193cc669da80ab42521c6319d3cc921a4a997690dcc0" dependencies = [ "arrow", "arrow-arith", @@ -3844,11 +3816,14 @@ dependencies = [ "deepsize", "futures", "http 1.4.0", + "io-uring", "lance-arrow", "lance-core", "lance-namespace", + "libc", "log", - "object_store", + "moka", + "object_store 0.12.5", "object_store_opendal", "opendal", "path_abs", @@ -3865,9 +3840,9 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0285b70da35def7ed95e150fae1d5308089554e1290470403ed3c50cb235bc5e" +checksum = "51aa9b73279f505b2bec0f194c7a2390ca74ad3260131e631a7bef8d97d54b2e" dependencies = [ "arrow-array", "arrow-buffer", @@ -3883,9 +3858,9 @@ dependencies = [ [[package]] name = "lance-namespace" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f78e2a828b654e062a495462c6e3eb4fcf0e7e907d761b8f217fc09ccd3ceac" +checksum = "39cd01581f55ce45c49cbe494ee86c7ba7ca4ca3654690fd820941cd9105a46e" dependencies = [ "arrow", "async-trait", @@ -3898,9 +3873,9 @@ dependencies = [ [[package]] name = "lance-namespace-impls" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2392314f3da38f00d166295e44244208a65ccfc256e274fa8631849fc3f4d94" +checksum = "c2cb89f3933060f01350ad05a5a3fbda952e8ba638799bf8ac4cd2368416ee46" dependencies = [ "arrow", "arrow-ipc", @@ -3913,10 +3888,11 @@ dependencies = [ "lance-core", "lance-index", "lance-io", + "lance-linalg", "lance-namespace", "lance-table", "log", - "object_store", + "object_store 0.12.5", "rand 0.9.2", "serde_json", "snafu", @@ -3926,22 +3902,23 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.6.1" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee2e48de899e2931afb67fcddd0a08e439bf5d8b6ea2a2ed9cb8f4df669bd5cc" +checksum = "6369eee4682fb11edf538388b43c61ce288b8302fe89bb40944d7daa7faaae99" dependencies = [ "reqwest", "serde", "serde_json", "serde_repr", + "serde_with", "url", ] [[package]] name = "lance-table" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3df9c4adca3eb2074b3850432a9fb34248a3d90c3d6427d158b13ff9355664ee" +checksum = "5db70650465a1af174b7dfe6948ec91a3d466ada12e11274eb66e51132173aa0" dependencies = [ "arrow", "arrow-array", @@ -3959,7 +3936,7 @@ dependencies = [ "lance-file", "lance-io", "log", - "object_store", + "object_store 0.12.5", "prost", "prost-build", "prost-types", @@ -3976,6 +3953,17 @@ dependencies = [ "uuid", ] +[[package]] +name = "lance-tokenizer" +version = "6.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb08ef9382c9d58036c323db2c19cc097e02d1d0d87714fc7176b5d3b36a31aa" +dependencies = [ + "rust-stemmers", + "serde", + "unicode-normalization", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -3991,12 +3979,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" -[[package]] -name = "levenshtein_automata" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" - [[package]] name = "lexical-core" version = "1.0.6" @@ -4093,12 +4075,6 @@ dependencies = [ "linked-hash-map", ] -[[package]] -name = "linux-raw-sys" -version = "0.4.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" - [[package]] name = "linux-raw-sys" version = "0.12.1" @@ -4146,7 +4122,7 @@ dependencies = [ "quote", "regex-automata", "regex-syntax", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -4171,15 +4147,6 @@ dependencies = [ "tracing-subscriber", ] -[[package]] -name = "lru" -version = "0.12.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" -dependencies = [ - "hashbrown 0.15.5", -] - [[package]] name = "lru-slab" version = "0.1.2" @@ -4207,15 +4174,9 @@ dependencies = [ [[package]] name = "lz4_flex" -version = "0.11.6" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "373f5eceeeab7925e0c1098212f2fbc4d416adec9d35051a6ab251e824c1854a" - -[[package]] -name = "lz4_flex" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" +checksum = "7ef0d4ed8669f8f8826eb00dc878084aa8f253506c4fd5e8f58f5bce72ddb97e" dependencies = [ "twox-hash", ] @@ -4258,30 +4219,12 @@ dependencies = [ "digest", ] -[[package]] -name = "measure_time" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e" -dependencies = [ - "log", -] - [[package]] name = "memchr" version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" -[[package]] -name = "memmap2" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" -dependencies = [ - "libc", -] - [[package]] name = "miette" version = "7.6.0" @@ -4302,7 +4245,7 @@ checksum = "db5b29714e950dbb20d5e6f74f9dcec4edbcc1067bb7f8ed198c097b8c1a818b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -4321,12 +4264,6 @@ dependencies = [ "unicase", ] -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "miniz_oxide" version = "0.8.9" @@ -4380,12 +4317,6 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" -[[package]] -name = "murmurhash32" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" - [[package]] name = "ndarray" version = "0.16.1" @@ -4407,16 +4338,6 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "nom" version = "8.0.0" @@ -4578,6 +4499,32 @@ dependencies = [ "web-time", ] +[[package]] +name = "object_store" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622acbc9100d3c10e2ee15804b0caa40e55c933d5aa53814cd520805b7958a49" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures-channel", + "futures-core", + "futures-util", + "http 1.4.0", + "humantime", + "itertools 0.14.0", + "parking_lot", + "percent-encoding", + "thiserror", + "tokio", + "tracing", + "url", + "walkdir", + "wasm-bindgen-futures", + "web-time", +] + [[package]] name = "object_store_opendal" version = "0.55.0" @@ -4588,7 +4535,7 @@ dependencies = [ "bytes", "chrono", "futures", - "object_store", + "object_store 0.12.5", "opendal", "pin-project", "tokio", @@ -4596,11 +4543,12 @@ dependencies = [ [[package]] name = "omnigraph-cli" -version = "0.4.2" +version = "0.6.0" dependencies = [ "assert_cmd", "clap", "color-eyre", + "lance", "lance-index", "omnigraph-compiler", "omnigraph-engine", @@ -4617,7 +4565,7 @@ dependencies = [ [[package]] name = "omnigraph-compiler" -version = "0.4.2" +version = "0.6.0" dependencies = [ "ahash", "arrow-array", @@ -4638,7 +4586,7 @@ dependencies = [ [[package]] name = "omnigraph-engine" -version = "0.4.2" +version = "0.6.0" dependencies = [ "arc-swap", "arrow-array", @@ -4660,7 +4608,7 @@ dependencies = [ "lance-namespace", "lance-namespace-impls", "lance-table", - "object_store", + "object_store 0.12.5", "omnigraph-compiler", "omnigraph-policy", "regex", @@ -4679,7 +4627,7 @@ dependencies = [ [[package]] name = "omnigraph-policy" -version = "0.4.2" +version = "0.6.0" dependencies = [ "cedar-policy", "clap", @@ -4692,7 +4640,7 @@ dependencies = [ [[package]] name = "omnigraph-server" -version = "0.4.2" +version = "0.6.0" dependencies = [ "async-trait", "aws-config", @@ -4702,6 +4650,7 @@ dependencies = [ "color-eyre", "dashmap", "futures", + "lance", "lance-index", "omnigraph-compiler", "omnigraph-engine", @@ -4733,12 +4682,6 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" -[[package]] -name = "oneshot" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "269bca4c2591a28585d6bf10d9ed0332b7d76900a1b02bec41bdc3a2cdcda107" - [[package]] name = "opendal" version = "0.55.0" @@ -4806,15 +4749,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" -[[package]] -name = "ownedbytes" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fbd56f7631767e61784dc43f8580f403f4475bd4aaa4da003e6295e1bab4a7e" -dependencies = [ - "stable_deref_trait", -] - [[package]] name = "owo-colors" version = "4.2.3" @@ -4939,7 +4873,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -5024,7 +4958,7 @@ checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -5176,7 +5110,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -5213,7 +5147,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.115", + "syn 2.0.117", "tempfile", ] @@ -5227,7 +5161,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -5410,16 +5344,6 @@ dependencies = [ "getrandom 0.3.4", ] -[[package]] -name = "rand_distr" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" -dependencies = [ - "num-traits", - "rand 0.8.5", -] - [[package]] name = "rand_distr" version = "0.5.1" @@ -5521,7 +5445,7 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -5730,19 +5654,6 @@ dependencies = [ "semver", ] -[[package]] -name = "rustix" -version = "0.38.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" -dependencies = [ - "bitflags", - "errno", - "libc", - "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", -] - [[package]] name = "rustix" version = "1.1.4" @@ -5752,7 +5663,7 @@ dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys 0.12.1", + "linux-raw-sys", "windows-sys 0.61.2", ] @@ -6009,7 +5920,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6045,7 +5956,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6088,7 +5999,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6127,7 +6038,7 @@ checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6227,15 +6138,6 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" -[[package]] -name = "sketches-ddsketch" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6f73aeb92d671e0cc4dca167e59b2deb6387c375391bc99ee743f326994a2b" -dependencies = [ - "serde", -] - [[package]] name = "slab" version = "0.4.12" @@ -6276,7 +6178,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6317,9 +6219,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.59.0" +version = "0.61.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" dependencies = [ "log", "sqlparser_derive", @@ -6327,13 +6229,13 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6404,7 +6306,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6426,9 +6328,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.115" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e614ed320ac28113fa64972c4262d5dbc89deacdfd00c34a3e4cea073243c12" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -6452,7 +6354,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6461,152 +6363,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" -[[package]] -name = "tantivy" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64a966cb0e76e311f09cf18507c9af192f15d34886ee43d7ba7c7e3803660c43" -dependencies = [ - "aho-corasick", - "arc-swap", - "base64", - "bitpacking", - "bon", - "byteorder", - "census", - "crc32fast", - "crossbeam-channel", - "downcast-rs", - "fastdivide", - "fnv", - "fs4", - "htmlescape", - "hyperloglogplus", - "itertools 0.14.0", - "levenshtein_automata", - "log", - "lru", - "lz4_flex 0.11.6", - "measure_time", - "memmap2", - "once_cell", - "oneshot", - "rayon", - "regex", - "rust-stemmers", - "rustc-hash", - "serde", - "serde_json", - "sketches-ddsketch", - "smallvec", - "tantivy-bitpacker", - "tantivy-columnar", - "tantivy-common", - "tantivy-fst", - "tantivy-query-grammar", - "tantivy-stacker", - "tantivy-tokenizer-api", - "tempfile", - "thiserror", - "time", - "uuid", - "winapi", -] - -[[package]] -name = "tantivy-bitpacker" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1adc286a39e089ae9938935cd488d7d34f14502544a36607effd2239ff0e2494" -dependencies = [ - "bitpacking", -] - -[[package]] -name = "tantivy-columnar" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6300428e0c104c4f7db6f95b466a6f5c1b9aece094ec57cdd365337908dc7344" -dependencies = [ - "downcast-rs", - "fastdivide", - "itertools 0.14.0", - "serde", - "tantivy-bitpacker", - "tantivy-common", - "tantivy-sstable", - "tantivy-stacker", -] - -[[package]] -name = "tantivy-common" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b6ea6090ce03dc72c27d0619e77185d26cc3b20775966c346c6d4f7e99d7f" -dependencies = [ - "async-trait", - "byteorder", - "ownedbytes", - "serde", - "time", -] - -[[package]] -name = "tantivy-fst" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18" -dependencies = [ - "byteorder", - "regex-syntax", - "utf8-ranges", -] - -[[package]] -name = "tantivy-query-grammar" -version = "0.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e810cdeeebca57fc3f7bfec5f85fdbea9031b2ac9b990eb5ff49b371d52bbe6a" -dependencies = [ - "nom 7.1.3", - "serde", - "serde_json", -] - -[[package]] -name = "tantivy-sstable" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "709f22c08a4c90e1b36711c1c6cad5ae21b20b093e535b69b18783dd2cb99416" -dependencies = [ - "futures-util", - "itertools 0.14.0", - "tantivy-bitpacker", - "tantivy-common", - "tantivy-fst", - "zstd", -] - -[[package]] -name = "tantivy-stacker" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bcdebb267671311d1e8891fd9d1301803fdb8ad21ba22e0a30d0cab49ba59c1" -dependencies = [ - "murmurhash32", - "rand_distr 0.4.3", - "tantivy-common", -] - -[[package]] -name = "tantivy-tokenizer-api" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfa942fcee81e213e09715bbce8734ae2180070b97b33839a795ba1de201547d" -dependencies = [ - "serde", -] - [[package]] name = "tap" version = "1.0.1" @@ -6622,7 +6378,7 @@ dependencies = [ "fastrand", "getrandom 0.4.2", "once_cell", - "rustix 1.1.4", + "rustix", "windows-sys 0.61.2", ] @@ -6658,7 +6414,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6769,7 +6525,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6801,6 +6557,7 @@ dependencies = [ "futures-core", "pin-project-lite", "tokio", + "tokio-util", ] [[package]] @@ -6888,7 +6645,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -7113,7 +6870,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -7244,7 +7001,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", "wasm-bindgen-shared", ] @@ -7333,22 +7090,6 @@ dependencies = [ "rustls-pki-types", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - [[package]] name = "winapi-util" version = "0.1.11" @@ -7358,12 +7099,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-core" version = "0.62.2" @@ -7385,7 +7120,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -7396,7 +7131,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -7618,7 +7353,7 @@ dependencies = [ "heck", "indexmap 2.13.0", "prettyplease", - "syn 2.0.115", + "syn 2.0.117", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -7634,7 +7369,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -7722,7 +7457,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", "synstructure", ] @@ -7743,7 +7478,7 @@ checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -7763,7 +7498,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", "synstructure", ] @@ -7803,7 +7538,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index c3141d2..66bfc01 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,29 +14,29 @@ default-members = [ ] [workspace.dependencies] -arrow-array = "57" -arrow-ipc = "57" -arrow-schema = "57" -arrow-select = "57" -arrow-cast = { version = "57", features = ["prettyprint"] } -arrow-ord = "57" +arrow-array = "58" +arrow-ipc = "58" +arrow-schema = "58" +arrow-select = "58" +arrow-cast = { version = "58", features = ["prettyprint"] } +arrow-ord = "58" -datafusion = { version = "52", default-features = false } -datafusion-physical-plan = "52" -datafusion-physical-expr = "52" -datafusion-execution = "52" -datafusion-common = "52" -datafusion-expr = "52" -datafusion-functions-aggregate = "52" +datafusion = { version = "53", default-features = false, features = ["nested_expressions"] } +datafusion-physical-plan = "53" +datafusion-physical-expr = "53" +datafusion-execution = "53" +datafusion-common = "53" +datafusion-expr = "53" +datafusion-functions-aggregate = "53" -lance = { version = "4.0.0", default-features = false, features = ["aws"] } -lance-datafusion = "4.0.0" -lance-file = "4.0.0" -lance-index = "4.0.0" -lance-linalg = "4.0.0" -lance-namespace = "4.0.0" -lance-namespace-impls = "4.0.0" -lance-table = "4.0.0" +lance = { version = "6.0.1", default-features = false, features = ["aws"] } +lance-datafusion = "6.0.1" +lance-file = "6.0.1" +lance-index = "6.0.1" +lance-linalg = "6.0.1" +lance-namespace = "6.0.1" +lance-namespace-impls = "6.0.1" +lance-table = "6.0.1" ulid = "1" futures = "0.3" diff --git a/README.md b/README.md index bf884af..49af70f 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/ That bootstrap: - starts RustFS on `127.0.0.1:9000` -- creates a bucket and S3-backed repo +- creates a bucket and S3-backed graph - loads the checked-in context fixture - launches `omnigraph-server` on `127.0.0.1:8080` @@ -69,8 +69,8 @@ Docker must be installed and running first. The RustFS bootstrap prefers the rolling `edge` binaries and only falls back to source builds when release assets are unavailable. -If a previous run left objects under the same repo prefix but did not finish -initializing the repo, rerun with `RESET_REPO=1` or set `PREFIX` to a new +If a previous run left objects under the same graph prefix but did not finish +initializing the graph, rerun with `RESET_REPO=1` or set `PREFIX` to a new value. ## Common Commands @@ -78,12 +78,12 @@ value. The same URI works for local paths, `s3://…`, or `http://host:port`. ```bash -omnigraph init --schema ./schema.pg ./repo.omni -omnigraph load --data ./data.jsonl ./repo.omni -omnigraph read --query ./queries.gq --name get_person --params '{"name":"Alice"}' ./repo.omni -omnigraph change --query ./queries.gq --name insert_person --params '{"name":"Mina"}' ./repo.omni -omnigraph branch create --from main feature-x ./repo.omni -omnigraph branch merge feature-x --into main ./repo.omni +omnigraph init --schema ./schema.pg ./graph.omni +omnigraph load --data ./data.jsonl ./graph.omni +omnigraph read --query ./queries.gq --name get_person --params '{"name":"Alice"}' ./graph.omni +omnigraph change --query ./queries.gq --name insert_person --params '{"name":"Mina"}' ./graph.omni +omnigraph branch create --from main feature-x ./graph.omni +omnigraph branch merge feature-x --into main ./graph.omni ``` See [docs/user/cli.md](docs/user/cli.md) for schema apply, snapshots, ingest, runs, and policy commands. diff --git a/crates/omnigraph-cli/Cargo.toml b/crates/omnigraph-cli/Cargo.toml index 6441bd9..0d35ed8 100644 --- a/crates/omnigraph-cli/Cargo.toml +++ b/crates/omnigraph-cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "omnigraph-cli" -version = "0.4.2" +version = "0.6.0" edition = "2024" description = "CLI for the Omnigraph graph database." license = "MIT" @@ -13,10 +13,10 @@ name = "omnigraph" path = "src/main.rs" [dependencies] -omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.4.2" } -omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" } -omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" } -omnigraph-server = { path = "../omnigraph-server", version = "0.4.2" } +omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.0" } +omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" } +omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" } +omnigraph-server = { path = "../omnigraph-server", version = "0.6.0" } clap = { workspace = true } color-eyre = { workspace = true } serde = { workspace = true } @@ -30,4 +30,5 @@ assert_cmd = "2" predicates = "3" serde_json = { workspace = true } tempfile = { workspace = true } +lance = { workspace = true } lance-index = { workspace = true } diff --git a/crates/omnigraph-cli/src/main.rs b/crates/omnigraph-cli/src/main.rs index a7c762b..6fadbed 100644 --- a/crates/omnigraph-cli/src/main.rs +++ b/crates/omnigraph-cli/src/main.rs @@ -67,16 +67,16 @@ enum Command { Version, /// Generate, clean, or refresh explicit seed embeddings Embed(EmbedArgs), - /// Initialize a new repo from a schema + /// Initialize a new graph from a schema Init { #[arg(long)] schema: PathBuf, - /// Repo URI (local path or s3://) + /// Graph URI (local path or s3://) uri: String, }, - /// Load data into a repo + /// Load data into a graph Load { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -93,7 +93,7 @@ enum Command { }, /// Ingest data into a reviewable named branch Ingest { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -127,7 +127,7 @@ enum Command { /// printed and the invocation is rewritten to `omnigraph lint`). #[command(visible_alias = "check")] Lint { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -140,9 +140,9 @@ enum Command { #[arg(long)] json: bool, }, - /// Show repo snapshot + /// Show graph snapshot Snapshot { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -155,7 +155,7 @@ enum Command { }, /// Export a full graph snapshot as JSONL Export { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -182,7 +182,7 @@ enum Command { /// when used. Pairs with `omnigraph mutate` on the write side. #[command(visible_alias = "read")] Query { - /// Repo URI + /// Graph URI #[arg(long)] uri: Option, #[arg(hide = true)] @@ -220,7 +220,7 @@ enum Command { /// warning when used. Pairs with `omnigraph query` on the read side. #[command(visible_alias = "change")] Mutate { - /// Repo URI + /// Graph URI #[arg(long)] uri: Option, #[arg(hide = true)] @@ -252,9 +252,9 @@ enum Command { #[command(subcommand)] command: PolicyCommand, }, - /// Compact small Lance fragments in every table of the repo + /// Compact small Lance fragments in every table of the graph Optimize { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -263,9 +263,9 @@ enum Command { #[arg(long)] json: bool, }, - /// Remove old Lance versions from every table of the repo (destructive) + /// Remove old Lance versions from every table of the graph (destructive) Cleanup { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -291,7 +291,7 @@ enum Command { enum BranchCommand { /// Create a new branch Create { - /// Repo URI + /// Graph URI #[arg(long)] uri: Option, #[arg(long)] @@ -306,7 +306,7 @@ enum BranchCommand { }, /// List branches List { - /// Repo URI + /// Graph URI #[arg(long)] uri: Option, #[arg(long)] @@ -318,7 +318,7 @@ enum BranchCommand { }, /// Delete a branch Delete { - /// Repo URI + /// Graph URI #[arg(long)] uri: Option, #[arg(long)] @@ -331,7 +331,7 @@ enum BranchCommand { }, /// Merge a source branch into a target branch Merge { - /// Repo URI + /// Graph URI #[arg(long)] uri: Option, #[arg(long)] @@ -350,7 +350,7 @@ enum BranchCommand { enum SchemaCommand { /// Plan a schema migration against the accepted persisted schema Plan { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -368,7 +368,7 @@ enum SchemaCommand { }, /// Apply a supported schema migration Apply { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -393,7 +393,7 @@ enum SchemaCommand { /// Show the current accepted schema source #[command(alias = "get")] Show { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -405,10 +405,11 @@ enum SchemaCommand { } #[derive(Debug, Subcommand)] + enum CommitCommand { /// List graph commits List { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -421,7 +422,7 @@ enum CommitCommand { }, /// Show a graph commit Show { - /// Repo URI + /// Graph URI #[arg(long)] uri: Option, #[arg(long)] @@ -594,7 +595,7 @@ fn finish_query_lint(output: &QueryLintOutput, json: bool) -> Result<()> { Ok(()) } -fn ensure_local_repo_parent(uri: &str) -> Result<()> { +fn ensure_local_graph_parent(uri: &str) -> Result<()> { if !uri.contains("://") { fs::create_dir_all(uri)?; } @@ -706,10 +707,10 @@ fn resolve_policy_engine(config: &OmnigraphConfig) -> Result { let policy_file = config .resolve_policy_file() .ok_or_else(|| color_eyre::eyre::eyre!("policy.file must be set in omnigraph.yaml"))?; - PolicyEngine::load(&policy_file, &policy_repo_id(config)) + PolicyEngine::load(&policy_file, &policy_graph_id(config)) } -/// Open a local-URI repo and, when `policy.file` is configured in +/// Open a local-URI graph and, when `policy.file` is configured in /// `omnigraph.yaml`, install the resolved `PolicyEngine` on the engine /// handle so every direct-engine write goes through /// `Omnigraph::enforce(...)` (MR-722). Without a configured policy this @@ -733,10 +734,7 @@ async fn open_local_db_with_policy(uri: &str, config: &OmnigraphConfig) -> Resul /// policy is configured and this returns `None`, the engine-layer /// footgun guard intentionally denies — silent bypass via "I forgot the /// actor" is what the guard prevents. -fn resolve_cli_actor<'a>( - cli_as: Option<&'a str>, - config: &'a OmnigraphConfig, -) -> Option<&'a str> { +fn resolve_cli_actor<'a>(cli_as: Option<&'a str>, config: &'a OmnigraphConfig) -> Option<&'a str> { cli_as.or(config.cli.actor.as_deref()) } @@ -748,7 +746,7 @@ fn resolve_policy_tests_path(config: &OmnigraphConfig) -> Result { }) } -fn policy_repo_id(config: &OmnigraphConfig) -> String { +fn policy_graph_id(config: &OmnigraphConfig) -> String { if let Some(name) = &config.project.name { return name.clone(); } @@ -846,8 +844,15 @@ fn parse_duration_arg(s: &str) -> Result { if s.is_empty() { bail!("duration is empty"); } - let (num_part, unit) = match s.char_indices().rev().find(|(_, c)| c.is_ascii_alphabetic()) { - Some((i, _)) => (&s[..i + 1 - s[i..].chars().next().unwrap().len_utf8()], &s[i..]), + let (num_part, unit) = match s + .char_indices() + .rev() + .find(|(_, c)| c.is_ascii_alphabetic()) + { + Some((i, _)) => ( + &s[..i + 1 - s[i..].chars().next().unwrap().len_utf8()], + &s[i..], + ), None => (s, ""), }; let n: u64 = num_part @@ -873,7 +878,7 @@ fn resolve_local_uri( let uri = resolve_uri(config, cli_uri, cli_target)?; if is_remote_uri(&uri) { bail!( - "{} is only supported against local repo URIs in this milestone", + "{} is only supported against local graph URIs in this milestone", operation ); } @@ -1138,9 +1143,7 @@ fn render_schema_plan_step(step: &SchemaMigrationStep) -> String { type_name, drop_mode_label(*mode), ), - SchemaMigrationStep::UnsupportedChange { - entity, reason, .. - } => { + SchemaMigrationStep::UnsupportedChange { entity, reason, .. } => { // When a schema-lint code is attached, render code + tier // so operators see at-a-glance the kind of risk (destructive // / validated / safe) — not just the rule identifier. @@ -1550,10 +1553,10 @@ async fn execute_query_lint( )); } - let has_repo_target = + let has_graph_target = cli_uri.is_some() || cli_target.is_some() || config.cli_graph_name().is_some(); - if !has_repo_target { - bail!("query lint requires --schema or a resolvable repo target"); + if !has_graph_target { + bail!("query lint requires --schema or a resolvable graph target"); } let uri = resolve_local_uri(config, cli_uri, cli_target, "query lint")?; @@ -1562,7 +1565,7 @@ async fn execute_query_lint( &db.catalog(), &query_source, query_path, - QueryLintSchemaSource::repo(uri), + QueryLintSchemaSource::graph(uri), )) } @@ -1806,7 +1809,7 @@ async fn main() -> Result<()> { } Command::Init { schema, uri } => { let schema_source = fs::read_to_string(&schema)?; - ensure_local_repo_parent(&uri)?; + ensure_local_graph_parent(&uri)?; Omnigraph::init(&uri, &schema_source).await?; scaffold_config_if_missing(&uri)?; println!("initialized {}", uri); @@ -2589,17 +2592,16 @@ async fn main() -> Result<()> { let config = load_cli_config(config.as_ref())?; let uri = resolve_uri(&config, uri, target.as_deref())?; - let older_than_dur = older_than - .as_deref() - .map(parse_duration_arg) - .transpose()?; + let older_than_dur = older_than.as_deref().map(parse_duration_arg).transpose()?; if keep.is_none() && older_than_dur.is_none() { bail!("cleanup requires at least one of --keep or --older-than"); } let policy_desc = match (keep, older_than_dur) { - (Some(k), Some(d)) => format!("keep {} versions, remove anything older than {:?}", k, d), + (Some(k), Some(d)) => { + format!("keep {} versions, remove anything older than {:?}", k, d) + } (Some(k), None) => format!("keep {} versions", k), (None, Some(d)) => format!("remove anything older than {:?}", d), _ => unreachable!(), diff --git a/crates/omnigraph-cli/tests/cli.rs b/crates/omnigraph-cli/tests/cli.rs index d44fd32..a93ef1f 100644 --- a/crates/omnigraph-cli/tests/cli.rs +++ b/crates/omnigraph-cli/tests/cli.rs @@ -1,6 +1,6 @@ use std::fs; -use lance_index::traits::DatasetIndexExt; +use lance::index::DatasetIndexExt; use omnigraph::db::{Omnigraph, ReadTarget}; use serde_json::Value; use tempfile::tempdir; @@ -48,9 +48,9 @@ cases: expect: deny "#; -fn manifest_dataset_version(repo: &std::path::Path) -> u64 { +fn manifest_dataset_version(graph: &std::path::Path) -> u64 { tokio::runtime::Runtime::new().unwrap().block_on(async { - Omnigraph::open(repo.to_string_lossy().as_ref()) + Omnigraph::open(graph.to_string_lossy().as_ref()) .await .unwrap() .snapshot_of(ReadTarget::branch("main")) @@ -67,7 +67,7 @@ fn write_policy_config_fixture(root: &std::path::Path) -> (std::path::PathBuf, s &config, r#" project: - name: policy-test-repo + name: policy-test-graph policy: file: ./policy.yaml "#, @@ -221,26 +221,26 @@ fn embed_seed_preserves_non_entity_rows() { } #[test] -fn init_creates_repo_successfully_on_missing_local_directory() { +fn init_creates_graph_successfully_on_missing_local_directory() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema = fixture("test.pg"); - let output = output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&repo)); + let output = output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&graph)); let stdout = stdout_string(&output); assert!(stdout.contains("initialized")); - assert!(repo.join("_schema.pg").exists()); - assert!(repo.join("__manifest").exists()); + assert!(graph.join("_schema.pg").exists()); + assert!(graph.join("__manifest").exists()); assert!(temp.path().join("omnigraph.yaml").exists()); } #[test] fn schema_plan_json_reports_supported_additive_change() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("next.pg"); - init_repo(&repo); + init_graph(&graph); let next_schema = fs::read_to_string(fixture("test.pg")).unwrap().replace( " age: I32?\n}", @@ -255,7 +255,7 @@ fn schema_plan_json_reports_supported_additive_change() { .arg("--schema") .arg(&schema_path) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); @@ -270,9 +270,9 @@ fn schema_plan_json_reports_supported_additive_change() { #[test] fn schema_plan_json_reports_unsupported_type_change() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("breaking.pg"); - init_repo(&repo); + init_graph(&graph); let breaking_schema = fs::read_to_string(fixture("test.pg")) .unwrap() @@ -286,7 +286,7 @@ fn schema_plan_json_reports_unsupported_type_change() { .arg("--schema") .arg(&schema_path) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); @@ -303,9 +303,9 @@ fn schema_plan_json_reports_unsupported_type_change() { #[test] fn schema_apply_json_applies_supported_migration() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("next.pg"); - init_repo(&repo); + init_graph(&graph); let next_schema = fs::read_to_string(fixture("test.pg")).unwrap().replace( " age: I32?\n}", @@ -320,7 +320,7 @@ fn schema_apply_json_applies_supported_migration() { .arg("--schema") .arg(&schema_path) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); @@ -330,7 +330,7 @@ fn schema_apply_json_applies_supported_migration() { let db = tokio::runtime::Runtime::new() .unwrap() - .block_on(Omnigraph::open(repo.to_string_lossy().as_ref())) + .block_on(Omnigraph::open(graph.to_string_lossy().as_ref())) .unwrap(); assert!( db.catalog().node_types["Person"] @@ -342,9 +342,9 @@ fn schema_apply_json_applies_supported_migration() { #[test] fn schema_apply_human_reports_noop() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = fixture("test.pg"); - init_repo(&repo); + init_graph(&graph); let output = output_success( cli() @@ -352,7 +352,7 @@ fn schema_apply_human_reports_noop() { .arg("apply") .arg("--schema") .arg(&schema_path) - .arg(&repo), + .arg(&graph), ); let stdout = stdout_string(&output); @@ -363,9 +363,9 @@ fn schema_apply_human_reports_noop() { #[test] fn schema_apply_json_renames_type_and_updates_snapshot() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("rename.pg"); - init_repo(&repo); + init_graph(&graph); let renamed_schema = fs::read_to_string(fixture("test.pg")) .unwrap() @@ -384,14 +384,14 @@ fn schema_apply_json_renames_type_and_updates_snapshot() { .arg("--schema") .arg(&schema_path) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); assert_eq!(payload["applied"], true); let db = tokio::runtime::Runtime::new() .unwrap() - .block_on(Omnigraph::open(repo.to_string_lossy().as_ref())) + .block_on(Omnigraph::open(graph.to_string_lossy().as_ref())) .unwrap(); let snapshot = tokio::runtime::Runtime::new() .unwrap() @@ -404,9 +404,9 @@ fn schema_apply_json_renames_type_and_updates_snapshot() { #[test] fn schema_apply_json_renames_property_and_updates_catalog() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("rename-property.pg"); - init_repo(&repo); + init_graph(&graph); let renamed_schema = fs::read_to_string(fixture("test.pg")) .unwrap() @@ -420,14 +420,14 @@ fn schema_apply_json_renames_property_and_updates_catalog() { .arg("--schema") .arg(&schema_path) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); assert_eq!(payload["applied"], true); let db = tokio::runtime::Runtime::new() .unwrap() - .block_on(Omnigraph::open(repo.to_string_lossy().as_ref())) + .block_on(Omnigraph::open(graph.to_string_lossy().as_ref())) .unwrap(); let person = &db.catalog().node_types["Person"]; assert!(person.properties.contains_key("years")); @@ -437,12 +437,12 @@ fn schema_apply_json_renames_property_and_updates_catalog() { #[test] fn schema_apply_json_adds_index_for_existing_property() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("index.pg"); - init_repo(&repo); + init_graph(&graph); let before_index_count = tokio::runtime::Runtime::new().unwrap().block_on(async { - let db = Omnigraph::open(repo.to_string_lossy().as_ref()) + let db = Omnigraph::open(graph.to_string_lossy().as_ref()) .await .unwrap(); let snapshot = db.snapshot_of(ReadTarget::branch("main")).await.unwrap(); @@ -462,13 +462,13 @@ fn schema_apply_json_adds_index_for_existing_property() { .arg("--schema") .arg(&schema_path) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); assert_eq!(payload["applied"], true); let after_index_count = tokio::runtime::Runtime::new().unwrap().block_on(async { - let db = Omnigraph::open(repo.to_string_lossy().as_ref()) + let db = Omnigraph::open(graph.to_string_lossy().as_ref()) .await .unwrap(); let snapshot = db.snapshot_of(ReadTarget::branch("main")).await.unwrap(); @@ -481,9 +481,9 @@ fn schema_apply_json_adds_index_for_existing_property() { #[test] fn schema_apply_rejects_unsupported_plan() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("breaking.pg"); - init_repo(&repo); + init_graph(&graph); let breaking_schema = fs::read_to_string(fixture("test.pg")) .unwrap() @@ -496,7 +496,7 @@ fn schema_apply_rejects_unsupported_plan() { .arg("apply") .arg("--schema") .arg(&schema_path) - .arg(&repo), + .arg(&graph), ); let stderr = String::from_utf8_lossy(&output.stderr); assert!(stderr.contains("changing property type")); @@ -505,9 +505,9 @@ fn schema_apply_rejects_unsupported_plan() { #[test] fn schema_apply_rejects_when_non_main_branch_exists() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("next.pg"); - init_repo(&repo); + init_graph(&graph); output_success( cli() .arg("branch") @@ -515,7 +515,7 @@ fn schema_apply_rejects_when_non_main_branch_exists() { .arg("--from") .arg("main") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("feature"), ); @@ -531,10 +531,10 @@ fn schema_apply_rejects_when_non_main_branch_exists() { .arg("apply") .arg("--schema") .arg(&schema_path) - .arg(&repo), + .arg(&graph), ); let stderr = String::from_utf8_lossy(&output.stderr); - assert!(stderr.contains("schema apply requires a repo with only main")); + assert!(stderr.contains("schema apply requires a graph with only main")); } #[test] @@ -757,11 +757,11 @@ fn deprecated_read_and_change_subcommands_emit_warnings() { } #[test] -fn query_lint_can_use_local_repo_via_positional_uri() { +fn query_lint_can_use_local_graph_via_positional_uri() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let query_path = temp.path().join("queries.gq"); - init_repo(&repo); + init_graph(&graph); write_query_file( &query_path, r#" @@ -779,24 +779,24 @@ query list_people() { .arg("--query") .arg(&query_path) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); assert_eq!(payload["status"], "ok"); - assert_eq!(payload["schema_source"]["kind"], "repo"); + assert_eq!(payload["schema_source"]["kind"], "graph"); assert_eq!( payload["schema_source"]["uri"].as_str(), - Some(repo.to_string_lossy().as_ref()) + Some(graph.to_string_lossy().as_ref()) ); } #[test] -fn query_lint_can_resolve_repo_and_query_from_config() { +fn query_lint_can_resolve_graph_and_query_from_config() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let config_path = temp.path().join("omnigraph.yaml"); - init_repo(&repo); + init_graph(&graph); write_query_file( &temp.path().join("queries.gq"), r#" @@ -806,7 +806,7 @@ query list_people() { } "#, ); - write_config(&config_path, &local_yaml_config(&repo)); + write_config(&config_path, &local_yaml_config(&graph)); let output = output_success( cli() @@ -821,10 +821,10 @@ query list_people() { let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); assert_eq!(payload["status"], "ok"); - assert_eq!(payload["schema_source"]["kind"], "repo"); + assert_eq!(payload["schema_source"]["kind"], "graph"); assert_eq!( payload["schema_source"]["uri"].as_str(), - Some(repo.to_string_lossy().as_ref()) + Some(graph.to_string_lossy().as_ref()) ); } @@ -852,12 +852,12 @@ query list_people() { ); let stderr = String::from_utf8_lossy(&output.stderr); assert!( - stderr.contains("query lint is only supported against local repo URIs in this milestone") + stderr.contains("query lint is only supported against local graph URIs in this milestone") ); } #[test] -fn query_lint_requires_schema_or_resolvable_repo_target() { +fn query_lint_requires_schema_or_resolvable_graph_target() { let temp = tempdir().unwrap(); let query_path = temp.path().join("queries.gq"); write_query_file( @@ -879,7 +879,7 @@ query list_people() { ); let stderr = String::from_utf8_lossy(&output.stderr); assert!( - stderr.contains("query lint requires --schema or a resolvable repo target") + stderr.contains("query lint requires --schema or a resolvable graph target") ); } @@ -971,8 +971,8 @@ query bad_update($slug: String) { #[test] fn load_json_outputs_summary_for_main_branch() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); let data = fixture("test.jsonl"); let output = output_success( @@ -981,7 +981,7 @@ fn load_json_outputs_summary_for_main_branch() { .arg("--data") .arg(&data) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); @@ -996,16 +996,16 @@ fn load_json_outputs_summary_for_main_branch() { #[test] fn load_into_feature_branch_with_merge_mode_succeeds() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("feature"), @@ -1026,7 +1026,7 @@ fn load_into_feature_branch_with_merge_mode_succeeds() { .arg("feature") .arg("--mode") .arg("merge") - .arg(&repo), + .arg(&graph), ); let stdout = stdout_string(&output); @@ -1038,15 +1038,15 @@ fn load_into_feature_branch_with_merge_mode_succeeds() { #[test] fn read_json_outputs_rows_for_named_query() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); let queries = fixture("test.gq"); let output = output_success( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(&queries) .arg("--name") @@ -1066,16 +1066,16 @@ fn read_json_outputs_rows_for_named_query() { #[test] fn export_jsonl_outputs_source_rows_for_selected_branch_and_type() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("feature"), @@ -1095,13 +1095,13 @@ fn export_jsonl_outputs_source_rows_for_selected_branch_and_type() { .arg("feature") .arg("--mode") .arg("append") - .arg(&repo), + .arg(&graph), ); let output = output_success( cli() .arg("export") - .arg(&repo) + .arg(&graph) .arg("--branch") .arg("feature") .arg("--type") @@ -1150,7 +1150,7 @@ fn policy_validate_fails_for_invalid_policy_file() { &config, r#" project: - name: policy-test-repo + name: policy-test-graph policy: file: ./policy.yaml "#, @@ -1242,11 +1242,11 @@ fn policy_explain_reports_decision_and_matched_rule() { #[test] fn read_can_resolve_uri_from_config() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let config = temp.path().join("omnigraph.yaml"); - init_repo(&repo); - load_fixture(&repo); - write_config(&config, &local_yaml_config(&repo)); + init_graph(&graph); + load_fixture(&graph); + write_config(&config, &local_yaml_config(&graph)); let output = output_success( cli() @@ -1268,11 +1268,11 @@ fn read_can_resolve_uri_from_config() { #[test] fn read_alias_from_yaml_config_runs_with_kv_output() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let config = temp.path().join("omnigraph.yaml"); let query = temp.path().join("aliases.gq"); - init_repo(&repo); - load_fixture(&repo); + init_graph(&graph); + load_fixture(&graph); write_query_file( &query, &std::fs::read_to_string(fixture("test.gq")).unwrap(), @@ -1281,7 +1281,7 @@ fn read_alias_from_yaml_config_runs_with_kv_output() { &config, &format!( "{}aliases:\n owner:\n command: read\n query: aliases.gq\n name: get_person\n args: [name]\n format: kv\n", - local_yaml_config(&repo) + local_yaml_config(&graph) ), ); @@ -1303,16 +1303,16 @@ fn read_alias_from_yaml_config_runs_with_kv_output() { #[test] fn read_alias_uses_alias_target_without_cli_default_and_accepts_url_like_arg() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let config = temp.path().join("omnigraph.yaml"); let query = temp.path().join("aliases.gq"); let data = temp.path().join("url-like.jsonl"); - init_repo(&repo); + init_graph(&graph); write_jsonl( &data, r#"{"type":"Person","data":{"name":"https://example.com","age":30}}"#, ); - output_success(cli().arg("load").arg("--data").arg(&data).arg(&repo)); + output_success(cli().arg("load").arg("--data").arg(&data).arg(&graph)); write_query_file( &query, &std::fs::read_to_string(fixture("test.gq")).unwrap(), @@ -1321,7 +1321,7 @@ fn read_alias_uses_alias_target_without_cli_default_and_accepts_url_like_arg() { &config, &format!( "graphs:\n local:\n uri: '{}'\nquery:\n roots:\n - .\npolicy: {{}}\naliases:\n owner:\n command: read\n query: aliases.gq\n name: get_person\n args: [name]\n graph: local\n format: kv\n", - repo.to_string_lossy() + graph.to_string_lossy() ), ); @@ -1343,11 +1343,11 @@ fn read_alias_uses_alias_target_without_cli_default_and_accepts_url_like_arg() { #[test] fn change_alias_from_yaml_config_persists_changes() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let config = temp.path().join("omnigraph.yaml"); let query = temp.path().join("mutations.gq"); - init_repo(&repo); - load_fixture(&repo); + init_graph(&graph); + load_fixture(&graph); write_query_file( &query, r#" @@ -1360,7 +1360,7 @@ query insert_person($name: String, $age: I32) { &config, &format!( "{}aliases:\n add_person:\n command: change\n query: mutations.gq\n name: insert_person\n args: [name, age]\n", - local_yaml_config(&repo) + local_yaml_config(&graph) ), ); @@ -1381,7 +1381,7 @@ query insert_person($name: String, $age: I32) { let verify = output_success( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -1397,14 +1397,14 @@ query insert_person($name: String, $age: I32) { #[test] fn read_csv_format_outputs_header_and_row_values() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); let output = output_success( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -1423,14 +1423,14 @@ fn read_csv_format_outputs_header_and_row_values() { #[test] fn read_jsonl_format_outputs_metadata_header_first() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); let output = output_success( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -1449,9 +1449,9 @@ fn read_jsonl_format_outputs_metadata_header_first() { #[test] fn change_json_outputs_affected_counts_and_persists() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); let mutation_file = temp.path().join("mutations.gq"); write_query_file( &mutation_file, @@ -1465,7 +1465,7 @@ query insert_person($name: String, $age: I32) { let output = output_success( cli() .arg("change") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(&mutation_file) .arg("--params") @@ -1481,7 +1481,7 @@ query insert_person($name: String, $age: I32) { let verify = output_success( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -1498,11 +1498,11 @@ query insert_person($name: String, $age: I32) { #[test] fn change_can_resolve_uri_and_branch_from_config() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let config = temp.path().join("omnigraph.yaml"); - init_repo(&repo); - load_fixture(&repo); - write_config(&config, &local_yaml_config(&repo)); + init_graph(&graph); + load_fixture(&graph); + write_config(&config, &local_yaml_config(&graph)); let mutation_file = temp.path().join("config-mutations.gq"); write_query_file( &mutation_file, @@ -1532,14 +1532,14 @@ query insert_person($name: String, $age: I32) { #[test] fn read_requires_name_for_multi_query_files() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); let output = output_failure( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(fixture("test.gq")), ); @@ -1550,8 +1550,8 @@ fn read_requires_name_for_multi_query_files() { #[test] fn read_supports_inline_query_string() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); + let repo = graph_path(temp.path()); + init_graph(&repo); load_fixture(&repo); let output = output_success( @@ -1573,8 +1573,8 @@ fn read_supports_inline_query_string() { #[test] fn change_supports_inline_query_string() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); + let repo = graph_path(temp.path()); + init_graph(&repo); load_fixture(&repo); let output = output_success( @@ -1608,8 +1608,8 @@ fn change_supports_inline_query_string() { #[test] fn read_rejects_query_string_combined_with_query() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); + let repo = graph_path(temp.path()); + init_graph(&repo); load_fixture(&repo); let output = output_failure( @@ -1631,8 +1631,8 @@ fn read_rejects_query_string_combined_with_query() { #[test] fn read_rejects_empty_query_string() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); + let repo = graph_path(temp.path()); + init_graph(&repo); load_fixture(&repo); let output = output_failure(cli().arg("read").arg(&repo).arg("-e").arg("")); @@ -1646,15 +1646,15 @@ fn read_rejects_empty_query_string() { #[test] fn branch_create_json_outputs_source_and_name() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); let output = output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("feature") @@ -1664,21 +1664,21 @@ fn branch_create_json_outputs_source_and_name() { assert_eq!(payload["from"], "main"); assert_eq!(payload["name"], "feature"); - assert_eq!(payload["uri"], repo.to_string_lossy().as_ref()); + assert_eq!(payload["uri"], graph.to_string_lossy().as_ref()); } #[test] fn branch_list_outputs_sorted_branches() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("zeta"), @@ -1688,13 +1688,13 @@ fn branch_list_outputs_sorted_branches() { .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("alpha"), ); - let output = output_success(cli().arg("branch").arg("list").arg("--uri").arg(&repo)); + let output = output_success(cli().arg("branch").arg("list").arg("--uri").arg(&graph)); let stdout = stdout_string(&output); let lines = stdout .lines() @@ -1708,15 +1708,15 @@ fn branch_list_outputs_sorted_branches() { #[test] fn branch_delete_json_outputs_name_and_removes_branch() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("feature"), @@ -1727,15 +1727,15 @@ fn branch_delete_json_outputs_name_and_removes_branch() { .arg("branch") .arg("delete") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("feature") .arg("--json"), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); assert_eq!(payload["name"], "feature"); - assert_eq!(payload["uri"], repo.to_string_lossy().as_ref()); + assert_eq!(payload["uri"], graph.to_string_lossy().as_ref()); - let listed = output_success(cli().arg("branch").arg("list").arg("--uri").arg(&repo)); + let listed = output_success(cli().arg("branch").arg("list").arg("--uri").arg(&graph)); let stdout = stdout_string(&listed); let lines = stdout .lines() @@ -1748,15 +1748,15 @@ fn branch_delete_json_outputs_name_and_removes_branch() { #[test] fn branch_delete_rejects_main() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); let output = output_failure( cli() .arg("branch") .arg("delete") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("main"), ); let stderr = String::from_utf8(output.stderr).unwrap(); @@ -1766,16 +1766,16 @@ fn branch_delete_rejects_main() { #[test] fn branch_merge_defaults_target_to_main() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("feature"), @@ -1795,7 +1795,7 @@ fn branch_merge_defaults_target_to_main() { .arg("feature") .arg("--mode") .arg("append") - .arg(&repo), + .arg(&graph), ); let merge_output = output_success( @@ -1803,7 +1803,7 @@ fn branch_merge_defaults_target_to_main() { .arg("branch") .arg("merge") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("feature") .arg("--json"), ); @@ -1815,7 +1815,7 @@ fn branch_merge_defaults_target_to_main() { let snapshot_output = output_success( cli() .arg("snapshot") - .arg(&repo) + .arg(&graph) .arg("--branch") .arg("main") .arg("--json"), @@ -1835,16 +1835,16 @@ fn branch_merge_defaults_target_to_main() { #[test] fn branch_merge_supports_explicit_target() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("feature"), @@ -1854,7 +1854,7 @@ fn branch_merge_supports_explicit_target() { .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("experiment"), @@ -1874,7 +1874,7 @@ fn branch_merge_supports_explicit_target() { .arg("feature") .arg("--mode") .arg("append") - .arg(&repo), + .arg(&graph), ); let merge_output = output_success( @@ -1882,7 +1882,7 @@ fn branch_merge_supports_explicit_target() { .arg("branch") .arg("merge") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("feature") .arg("--into") .arg("experiment") @@ -1896,17 +1896,17 @@ fn branch_merge_supports_explicit_target() { #[test] fn snapshot_json_returns_manifest_version_and_tables() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); - let output = output_success(cli().arg("snapshot").arg(&repo).arg("--json")); + let output = output_success(cli().arg("snapshot").arg(&graph).arg("--json")); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); assert_eq!(payload["branch"], "main"); assert_eq!( payload["manifest_version"].as_u64().unwrap(), - manifest_dataset_version(&repo) + manifest_dataset_version(&graph) ); assert!(payload["tables"].as_array().unwrap().len() >= 4); } @@ -1976,11 +1976,11 @@ fn read_embedded_rows(path: std::path::PathBuf) -> Vec { #[test] fn snapshot_can_resolve_uri_from_config() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let config = temp.path().join("omnigraph.yaml"); - init_repo(&repo); - load_fixture(&repo); - write_config(&config, &local_yaml_config(&repo)); + init_graph(&graph); + load_fixture(&graph); + write_config(&config, &local_yaml_config(&graph)); let output = output_success( cli() @@ -1996,11 +1996,11 @@ fn snapshot_can_resolve_uri_from_config() { #[test] fn snapshot_human_output_includes_branch_and_table_summaries() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); - let output = output_success(cli().arg("snapshot").arg(&repo)); + let output = output_success(cli().arg("snapshot").arg(&graph)); let stdout = stdout_string(&output); assert!(stdout.contains("branch: main")); @@ -2012,11 +2012,11 @@ fn snapshot_human_output_includes_branch_and_table_summaries() { #[test] fn commit_show_accepts_long_uri_flag() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); - let list = output_success(cli().arg("commit").arg("list").arg(&repo).arg("--json")); + let list = output_success(cli().arg("commit").arg("list").arg(&graph).arg("--json")); let list_payload: Value = serde_json::from_slice(&list.stdout).unwrap(); let commit_id = list_payload["commits"][0]["graph_commit_id"] .as_str() @@ -2028,7 +2028,7 @@ fn commit_show_accepts_long_uri_flag() { .arg("commit") .arg("show") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg(&commit_id) .arg("--json"), ); @@ -2039,11 +2039,11 @@ fn commit_show_accepts_long_uri_flag() { } #[test] -fn cli_fails_for_missing_repo() { +fn cli_fails_for_missing_graph() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); - let output = output_failure(cli().arg("snapshot").arg(&repo)); + let output = output_failure(cli().arg("snapshot").arg(&graph)); let stderr = String::from_utf8(output.stderr).unwrap(); assert!( stderr.contains("_schema.pg") @@ -2055,7 +2055,7 @@ fn cli_fails_for_missing_repo() { #[test] fn cli_fails_for_missing_schema_or_data_file() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let missing_schema = temp.path().join("missing.pg"); let missing_data = temp.path().join("missing.jsonl"); @@ -2064,7 +2064,7 @@ fn cli_fails_for_missing_schema_or_data_file() { .arg("init") .arg("--schema") .arg(&missing_schema) - .arg(&repo), + .arg(&graph), ); assert!( String::from_utf8(init_output.stderr) @@ -2072,13 +2072,13 @@ fn cli_fails_for_missing_schema_or_data_file() { .contains("No such file") ); - init_repo(&repo); + init_graph(&graph); let load_output = output_failure( cli() .arg("load") .arg("--data") .arg(&missing_data) - .arg(&repo), + .arg(&graph), ); assert!( String::from_utf8(load_output.stderr) @@ -2090,16 +2090,16 @@ fn cli_fails_for_missing_schema_or_data_file() { #[test] fn cli_fails_for_invalid_merge_requests() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); let missing_branch = output_failure( cli() .arg("branch") .arg("merge") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("missing"), ); let missing_branch_stderr = String::from_utf8(missing_branch.stderr).unwrap(); @@ -2114,7 +2114,7 @@ fn cli_fails_for_invalid_merge_requests() { .arg("branch") .arg("merge") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("main") .arg("--into") .arg("main"), @@ -2142,9 +2142,9 @@ fn cli_fails_for_invalid_merge_requests() { #[test] fn schema_apply_allow_data_loss_flag_promotes_drops_to_hard() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("drop-age.pg"); - init_repo(&repo); + init_graph(&graph); // Drop the nullable `age` column. let next_schema = fs::read_to_string(fixture("test.pg")) @@ -2160,7 +2160,7 @@ fn schema_apply_allow_data_loss_flag_promotes_drops_to_hard() { .arg(&schema_path) .arg("--allow-data-loss") .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); assert_eq!(payload["applied"], true); @@ -2183,9 +2183,9 @@ fn schema_apply_without_allow_data_loss_keeps_soft_drops() { // drops stay Soft. Pins default semantics against accidental Hard // promotion if a future refactor changes the option threading. let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("drop-age-soft.pg"); - init_repo(&repo); + init_graph(&graph); let next_schema = fs::read_to_string(fixture("test.pg")) .unwrap() @@ -2199,7 +2199,7 @@ fn schema_apply_without_allow_data_loss_keeps_soft_drops() { .arg("--schema") .arg(&schema_path) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); assert_eq!(payload["applied"], true); @@ -2225,8 +2225,8 @@ fn schema_plan_parity_cli_and_sdk() { // the HTTP soft/hard drop tests, which exercise apply with // identical fixtures. let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); let schema_path = temp.path().join("plan-parity.pg"); let next_schema = fs::read_to_string(fixture("test.pg")).unwrap().replace( " age: I32?\n}", @@ -2242,13 +2242,13 @@ fn schema_plan_parity_cli_and_sdk() { .arg("--schema") .arg(&schema_path) .arg("--json") - .arg(&repo), + .arg(&graph), ); let cli_payload: Value = serde_json::from_slice(&cli_output.stdout).unwrap(); - // SDK side: open repo, call plan_schema. + // SDK side: open graph, call plan_schema. let plan = tokio::runtime::Runtime::new().unwrap().block_on(async { - let db = Omnigraph::open(repo.to_string_lossy().as_ref()) + let db = Omnigraph::open(graph.to_string_lossy().as_ref()) .await .unwrap(); db.plan_schema(&next_schema).await.unwrap() diff --git a/crates/omnigraph-cli/tests/support/mod.rs b/crates/omnigraph-cli/tests/support/mod.rs index 31092ea..b62d861 100644 --- a/crates/omnigraph-cli/tests/support/mod.rs +++ b/crates/omnigraph-cli/tests/support/mod.rs @@ -52,7 +52,7 @@ pub fn fixture(name: &str) -> PathBuf { .join(name) } -pub fn repo_path(root: &Path) -> PathBuf { +pub fn graph_path(root: &Path) -> PathBuf { root.join("demo.omni") } @@ -86,14 +86,14 @@ pub fn parse_stdout_json(output: &Output) -> Value { serde_json::from_slice(&output.stdout).unwrap() } -pub fn init_repo(repo: &Path) { +pub fn init_graph(graph: &Path) { let schema = fixture("test.pg"); - output_success(cli().arg("init").arg("--schema").arg(&schema).arg(repo)); + output_success(cli().arg("init").arg("--schema").arg(&schema).arg(graph)); } -pub fn load_fixture(repo: &Path) { +pub fn load_fixture(graph: &Path) { let data = fixture("test.jsonl"); - output_success(cli().arg("load").arg("--data").arg(&data).arg(repo)); + output_success(cli().arg("load").arg("--data").arg(&data).arg(graph)); } pub fn write_jsonl(path: &Path, rows: &str) { @@ -116,7 +116,7 @@ fn yaml_string(value: &str) -> String { format!("'{}'", value.replace('\'', "''")) } -pub fn local_yaml_config(repo: &Path) -> String { +pub fn local_yaml_config(graph: &Path) -> String { format!( "\ graphs: @@ -130,7 +130,7 @@ query: - . policy: {{}} ", - yaml_string(&repo.to_string_lossy()) + yaml_string(&graph.to_string_lossy()) ) } @@ -200,9 +200,9 @@ fn spawn_server_process(mut command: StdCommand) -> TestServer { panic!("server did not become healthy"); } -pub fn spawn_server(repo: &Path) -> TestServer { +pub fn spawn_server(graph: &Path) -> TestServer { let mut command = server_process(); - command.arg(repo); + command.arg(graph); spawn_server_process(command) } @@ -221,58 +221,57 @@ pub fn spawn_server_with_config_env(config: &Path, envs: &[(&str, &str)]) -> Tes spawn_server_process(command) } - -pub struct SystemRepo { +pub struct SystemGraph { _temp: TempDir, - repo: PathBuf, + graph: PathBuf, } -impl SystemRepo { +impl SystemGraph { pub fn initialized() -> Self { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - Self { _temp: temp, repo } + let graph = graph_path(temp.path()); + init_graph(&graph); + Self { _temp: temp, graph } } pub fn loaded() -> Self { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); - Self { _temp: temp, repo } + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); + Self { _temp: temp, graph } } pub fn path(&self) -> &Path { - &self.repo + &self.graph } pub fn write_query(&self, name: &str, source: &str) -> PathBuf { - let path = self.repo.parent().unwrap().join(name); + let path = self.graph.parent().unwrap().join(name); write_query_file(&path, source); path } pub fn write_jsonl(&self, name: &str, rows: &str) -> PathBuf { - let path = self.repo.parent().unwrap().join(name); + let path = self.graph.parent().unwrap().join(name); write_jsonl(&path, rows); path } pub fn write_config(&self, name: &str, source: &str) -> PathBuf { - let path = self.repo.parent().unwrap().join(name); + let path = self.graph.parent().unwrap().join(name); write_config(&path, source); path } pub fn write_file(&self, name: &str, source: &str) -> PathBuf { - let path = self.repo.parent().unwrap().join(name); + let path = self.graph.parent().unwrap().join(name); write_file(&path, source); path } pub fn spawn_server(&self) -> TestServer { - spawn_server(&self.repo) + spawn_server(&self.graph) } pub fn spawn_server_with_config(&self, config: &Path) -> TestServer { diff --git a/crates/omnigraph-cli/tests/system_local.rs b/crates/omnigraph-cli/tests/system_local.rs index 882221f..074b203 100644 --- a/crates/omnigraph-cli/tests/system_local.rs +++ b/crates/omnigraph-cli/tests/system_local.rs @@ -66,7 +66,7 @@ fn yaml_string(value: &str) -> String { format!("'{}'", value.replace('\'', "''")) } -fn local_policy_config(repo: &SystemRepo) -> String { +fn local_policy_config(graph: &SystemGraph) -> String { format!( "\ project: @@ -83,12 +83,12 @@ query: policy: file: ./policy.yaml ", - yaml_string(&repo.path().to_string_lossy()) + yaml_string(&graph.path().to_string_lossy()) ) } -fn insert_person_query(repo: &SystemRepo, name: &str) -> std::path::PathBuf { - repo.write_query( +fn insert_person_query(graph: &SystemGraph, name: &str) -> std::path::PathBuf { + graph.write_query( name, r#" query insert_person($name: String, $age: I32) { @@ -98,8 +98,8 @@ query insert_person($name: String, $age: I32) { ) } -fn add_friend_query(repo: &SystemRepo, name: &str) -> std::path::PathBuf { - repo.write_query( +fn add_friend_query(graph: &SystemGraph, name: &str) -> std::path::PathBuf { + graph.write_query( name, r#" query add_friend($from: String, $to: String) { @@ -109,13 +109,13 @@ query add_friend($from: String, $to: String) { ) } -fn snapshot_table_row_count(repo: &SystemRepo, table_key: &str) -> u64 { - snapshot_table_row_count_at(repo.path(), table_key) +fn snapshot_table_row_count(graph: &SystemGraph, table_key: &str) -> u64 { + snapshot_table_row_count_at(graph.path(), table_key) } -fn snapshot_table_row_count_at(repo: &std::path::Path, table_key: &str) -> u64 { +fn snapshot_table_row_count_at(graph: &std::path::Path, table_key: &str) -> u64 { let payload = parse_stdout_json(&output_success( - cli().arg("snapshot").arg(repo).arg("--json"), + cli().arg("snapshot").arg(graph).arg("--json"), )); payload["tables"] .as_array() @@ -178,7 +178,7 @@ fn format_vector(values: &[f32]) -> String { .join(", ") } -fn s3_test_repo_uri(suite: &str) -> Option { +fn s3_test_graph_uri(suite: &str) -> Option { let bucket = env::var("OMNIGRAPH_S3_TEST_BUCKET").ok()?; let prefix = env::var("OMNIGRAPH_S3_TEST_PREFIX") .ok() @@ -193,21 +193,21 @@ fn s3_test_repo_uri(suite: &str) -> Option { #[test] fn local_cli_end_to_end_init_load_read_change_read_flow() { - let repo = SystemRepo::initialized(); - let mutation_file = insert_person_query(&repo, "system-local-init-change.gq"); + let graph = SystemGraph::initialized(); + let mutation_file = insert_person_query(&graph, "system-local-init-change.gq"); output_success( cli() .arg("load") .arg("--data") .arg(fixture("test.jsonl")) - .arg(repo.path()), + .arg(graph.path()), ); let read_before = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -222,7 +222,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() { let change_payload = parse_stdout_json(&output_success( cli() .arg("change") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(&mutation_file) .arg("--params") @@ -235,7 +235,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() { let read_after = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -253,7 +253,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() { let inline_change = parse_stdout_json(&output_success( cli() .arg("change") - .arg(repo.path()) + .arg(graph.path()) .arg("-e") .arg("query add($name: String, $age: I32) { insert Person { name: $name, age: $age } }") .arg("--params") @@ -267,7 +267,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() { let inline_read = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query-string") .arg("query find($name: String) { match { $p: Person { name: $name } } return { $p.name, $p.age } }") .arg("--params") @@ -281,15 +281,15 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() { #[test] fn local_cli_end_to_end_branch_change_merge_flow() { - let repo = SystemRepo::loaded(); - let mutation_file = insert_person_query(&repo, "system-local-change.gq"); + let graph = SystemGraph::loaded(); + let mutation_file = insert_person_query(&graph, "system-local-change.gq"); output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(repo.path()) + .arg(graph.path()) .arg("--from") .arg("main") .arg("feature"), @@ -298,7 +298,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() { let change_payload = parse_stdout_json(&output_success( cli() .arg("change") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(&mutation_file) .arg("--branch") @@ -313,7 +313,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() { let feature_read = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -332,7 +332,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() { .arg("branch") .arg("merge") .arg("--uri") - .arg(repo.path()) + .arg(graph.path()) .arg("feature") .arg("--json"), )); @@ -341,7 +341,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() { let main_read = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -358,7 +358,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() { cli() .arg("commit") .arg("list") - .arg(repo.path()) + .arg(graph.path()) .arg("--branch") .arg("main") .arg("--json"), @@ -368,8 +368,8 @@ fn local_cli_end_to_end_branch_change_merge_flow() { #[test] fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() { - let repo = SystemRepo::loaded(); - let ingest_data = repo.write_jsonl( + let graph = SystemGraph::loaded(); + let ingest_data = graph.write_jsonl( "system-local-ingest.jsonl", r#"{"type":"Person","data":{"name":"Zoe","age":33}} {"type":"Person","data":{"name":"Bob","age":26}}"#, @@ -382,7 +382,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() { .arg(&ingest_data) .arg("--branch") .arg("feature-ingest") - .arg(repo.path()) + .arg(graph.path()) .arg("--json"), )); assert_eq!(ingest_payload["branch"], "feature-ingest"); @@ -395,7 +395,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() { let feature_snapshot = parse_stdout_json(&output_success( cli() .arg("snapshot") - .arg(repo.path()) + .arg(graph.path()) .arg("--branch") .arg("feature-ingest") .arg("--json"), @@ -405,7 +405,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() { let zoe = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -422,7 +422,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() { let bob = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -439,20 +439,20 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() { #[test] fn local_cli_export_round_trips_full_branch_graph() { - let repo = SystemRepo::loaded(); + let graph = SystemGraph::loaded(); output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(repo.path()) + .arg(graph.path()) .arg("--from") .arg("main") .arg("feature"), ); - let feature_data = repo.write_jsonl( + let feature_data = graph.write_jsonl( "system-local-export-feature.jsonl", r#"{"type":"Person","data":{"name":"Eve","age":29}} {"edge":"Knows","from":"Alice","to":"Eve"}"#, @@ -466,53 +466,56 @@ fn local_cli_export_round_trips_full_branch_graph() { .arg("feature") .arg("--mode") .arg("append") - .arg(repo.path()), + .arg(graph.path()), ); let exported = stdout_string(&output_success( cli() .arg("export") - .arg(repo.path()) + .arg(graph.path()) .arg("--branch") .arg("feature") .arg("--jsonl"), )); - let export_path = repo.write_jsonl("system-local-exported.jsonl", &exported); - let imported_repo = repo.path().parent().unwrap().join("imported-export.omni"); + let export_path = graph.write_jsonl("system-local-exported.jsonl", &exported); + let imported_graph = graph.path().parent().unwrap().join("imported-export.omni"); output_success( cli() .arg("init") .arg("--schema") .arg(fixture("test.pg")) - .arg(&imported_repo), + .arg(&imported_graph), ); output_success( cli() .arg("load") .arg("--data") .arg(&export_path) - .arg(&imported_repo), + .arg(&imported_graph), ); assert_eq!( - snapshot_table_row_count_at(&imported_repo, "node:Person"), + snapshot_table_row_count_at(&imported_graph, "node:Person"), 5 ); assert_eq!( - snapshot_table_row_count_at(&imported_repo, "node:Company"), + snapshot_table_row_count_at(&imported_graph, "node:Company"), 2 ); - assert_eq!(snapshot_table_row_count_at(&imported_repo, "edge:Knows"), 4); assert_eq!( - snapshot_table_row_count_at(&imported_repo, "edge:WorksAt"), + snapshot_table_row_count_at(&imported_graph, "edge:Knows"), + 4 + ); + assert_eq!( + snapshot_table_row_count_at(&imported_graph, "edge:WorksAt"), 2 ); let eve = parse_stdout_json(&output_success( cli() .arg("read") - .arg(&imported_repo) + .arg(&imported_graph) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -527,7 +530,7 @@ fn local_cli_export_round_trips_full_branch_graph() { let friends = parse_stdout_json(&output_success( cli() .arg("read") - .arg(&imported_repo) + .arg(&imported_graph) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -541,7 +544,7 @@ fn local_cli_export_round_trips_full_branch_graph() { #[test] fn local_cli_s3_end_to_end_init_load_read_flow() { - let Some(repo_uri) = s3_test_repo_uri("cli-local") else { + let Some(graph_uri) = s3_test_graph_uri("cli-local") else { eprintln!("skipping s3 cli test: OMNIGRAPH_S3_TEST_BUCKET is not set"); return; }; @@ -566,7 +569,7 @@ query: - . policy: {{}} ", - repo_uri + graph_uri ), ); @@ -575,14 +578,14 @@ policy: {{}} .arg("init") .arg("--schema") .arg(fixture("test.pg")) - .arg(&repo_uri), + .arg(&graph_uri), ); output_success( cli() .arg("load") .arg("--data") .arg(fixture("test.jsonl")) - .arg(&repo_uri), + .arg(&graph_uri), ); let read = parse_stdout_json(&output_success( @@ -615,13 +618,13 @@ policy: {{}} #[test] fn local_cli_failed_load_keeps_target_state_unchanged() { - let repo = SystemRepo::loaded(); - let bad_data = repo.write_jsonl( + let graph = SystemGraph::loaded(); + let bad_data = graph.write_jsonl( "system-bad-load.jsonl", r#"{"edge":"Knows","from":"Alice","to":"Missing"}"#, ); - let person_rows_before = snapshot_table_row_count(&repo, "node:Person"); - let knows_rows_before = snapshot_table_row_count(&repo, "edge:Knows"); + let person_rows_before = snapshot_table_row_count(&graph, "node:Person"); + let knows_rows_before = snapshot_table_row_count(&graph, "edge:Knows"); let output = output_failure( cli() @@ -630,17 +633,17 @@ fn local_cli_failed_load_keeps_target_state_unchanged() { .arg(&bad_data) .arg("--mode") .arg("append") - .arg(repo.path()), + .arg(graph.path()), ); let stderr = String::from_utf8(output.stderr).unwrap(); assert!(stderr.contains("not found") || stderr.contains("Missing")); assert_eq!( - snapshot_table_row_count(&repo, "node:Person"), + snapshot_table_row_count(&graph, "node:Person"), person_rows_before ); assert_eq!( - snapshot_table_row_count(&repo, "edge:Knows"), + snapshot_table_row_count(&graph, "edge:Knows"), knows_rows_before ); // Failed loads leave no run record (the run lifecycle has been @@ -649,13 +652,13 @@ fn local_cli_failed_load_keeps_target_state_unchanged() { #[test] fn local_cli_failed_change_keeps_target_state_unchanged() { - let repo = SystemRepo::loaded(); - let mutation_file = add_friend_query(&repo, "system-invalid-change.gq"); + let graph = SystemGraph::loaded(); + let mutation_file = add_friend_query(&graph, "system-invalid-change.gq"); let output = output_failure( cli() .arg("change") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(&mutation_file) .arg("--params") @@ -667,7 +670,7 @@ fn local_cli_failed_change_keeps_target_state_unchanged() { let friends_payload = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -683,8 +686,8 @@ fn local_cli_failed_change_keeps_target_state_unchanged() { #[test] fn local_cli_resolves_relative_query_against_config_base_dir() { - let repo = SystemRepo::loaded(); - let root = repo.path().parent().unwrap(); + let graph = SystemGraph::loaded(); + let root = graph.path().parent().unwrap(); let config_dir = root.join("config"); let query_dir = config_dir.join("queries"); let ambient_dir = root.join("ambient"); @@ -707,7 +710,7 @@ query: - queries policy: {{}} ", - repo.path().display() + graph.path().display() ), ); write_query_file( @@ -761,7 +764,7 @@ query get_person($name: String) { #[test] fn local_cli_datetime_and_list_types_round_trip_through_load_read_and_change() { let temp = tempfile::tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema = temp.path().join("datatypes.pg"); let data = temp.path().join("datatypes.jsonl"); let queries = temp.path().join("datatypes.gq"); @@ -836,13 +839,13 @@ query get_task($slug: String) { "#, ); - output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&repo)); - output_success(cli().arg("load").arg("--data").arg(&data).arg(&repo)); + output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&graph)); + output_success(cli().arg("load").arg("--data").arg(&data).arg(&graph)); let filtered = parse_stdout_json(&output_success( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(&queries) .arg("--name") @@ -867,7 +870,7 @@ query get_task($slug: String) { let insert_payload = parse_stdout_json(&output_success( cli() .arg("change") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(&queries) .arg("--name") @@ -883,7 +886,7 @@ query get_task($slug: String) { let update_payload = parse_stdout_json(&output_success( cli() .arg("change") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(&queries) .arg("--name") @@ -897,7 +900,7 @@ query get_task($slug: String) { let gamma = parse_stdout_json(&output_success( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(&queries) .arg("--name") @@ -924,7 +927,7 @@ query get_task($slug: String) { #[ignore = "requires GEMINI_API_KEY and network access"] fn local_cli_real_gemini_string_nearest_query_returns_expected_match() { let temp = tempfile::tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema = temp.path().join("gemini.pg"); let data = temp.path().join("gemini.jsonl"); let queries = temp.path().join("gemini.gq"); @@ -966,13 +969,13 @@ query vector_search($q: String) { "#, ); - output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&repo)); - output_success(cli().arg("load").arg("--data").arg(&data).arg(&repo)); + output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&graph)); + output_success(cli().arg("load").arg("--data").arg(&data).arg(&graph)); let result = parse_stdout_json(&output_success( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(&queries) .arg("--name") @@ -999,10 +1002,10 @@ fn local_cli_policy_tooling_is_end_to_end() { // Sanity check for the read-only policy CLI surfaces. These don't // mutate the graph — they just parse and evaluate the policy file — // so they don't depend on PR #4's engine-side enforcement. - let repo = SystemRepo::loaded(); - let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo)); - repo.write_config("policy.yaml", POLICY_E2E_YAML); - repo.write_config("policy.tests.yaml", POLICY_E2E_TESTS_YAML); + let graph = SystemGraph::loaded(); + let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph)); + graph.write_config("policy.yaml", POLICY_E2E_YAML); + graph.write_config("policy.tests.yaml", POLICY_E2E_TESTS_YAML); let validate = output_success( cli() @@ -1053,10 +1056,10 @@ fn local_cli_change_enforces_engine_layer_policy() { // 3. Policy installed, `--as act-ragnor`, change on main → // Cedar permits (admins-write rule). Write succeeds and the // inserted row is readable. - let repo = SystemRepo::loaded(); - let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo)); - repo.write_config("policy.yaml", POLICY_E2E_YAML); - let mutation_file = insert_person_query(&repo, "system-local-policy-change.gq"); + let graph = SystemGraph::loaded(); + let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph)); + graph.write_config("policy.yaml", POLICY_E2E_YAML); + let mutation_file = insert_person_query(&graph, "system-local-policy-change.gq"); // Case 1: policy configured, no actor threaded → footgun guard. let no_actor = output_failure( @@ -1119,7 +1122,7 @@ fn local_cli_change_enforces_engine_layer_policy() { let verify = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -1145,10 +1148,10 @@ fn local_cli_change_enforces_engine_layer_policy() { #[test] fn local_cli_load_enforces_engine_layer_policy() { - let repo = SystemRepo::loaded(); - let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo)); - repo.write_config("policy.yaml", POLICY_E2E_YAML); - let data = repo.write_jsonl( + let graph = SystemGraph::loaded(); + let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph)); + graph.write_config("policy.yaml", POLICY_E2E_YAML); + let data = graph.write_jsonl( "system-local-policy-load.jsonl", r#"{"type":"Person","data":{"name":"LoadPolicy","age":11}}"#, ); @@ -1189,10 +1192,10 @@ fn local_cli_load_enforces_engine_layer_policy() { #[test] fn local_cli_ingest_enforces_engine_layer_policy() { - let repo = SystemRepo::loaded(); - let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo)); - repo.write_config("policy.yaml", POLICY_E2E_YAML); - let data = repo.write_jsonl( + let graph = SystemGraph::loaded(); + let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph)); + graph.write_config("policy.yaml", POLICY_E2E_YAML); + let data = graph.write_jsonl( "system-local-policy-ingest.jsonl", r#"{"type":"Person","data":{"name":"IngestPolicy","age":12}}"#, ); @@ -1242,16 +1245,19 @@ fn local_cli_ingest_enforces_engine_layer_policy() { #[test] fn local_cli_schema_apply_enforces_engine_layer_policy() { - let repo = SystemRepo::loaded(); - let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo)); - repo.write_config("policy.yaml", POLICY_E2E_YAML); + let graph = SystemGraph::loaded(); + let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph)); + graph.write_config("policy.yaml", POLICY_E2E_YAML); // Additive: add a nullable property; SDK-compatible with the fixture // schema. Uses the schema-apply scope (TargetBranch("main")). let new_schema = std::fs::read_to_string(fixture("test.pg")) .unwrap() - .replace(" age: I32?\n}", " age: I32?\n nickname: String?\n}"); - let schema_path = repo.path().join("policy-additive.pg"); + .replace( + " age: I32?\n}", + " age: I32?\n nickname: String?\n}", + ); + let schema_path = graph.path().join("policy-additive.pg"); std::fs::write(&schema_path, &new_schema).unwrap(); let denied = output_failure( @@ -1289,9 +1295,9 @@ fn local_cli_schema_apply_enforces_engine_layer_policy() { #[test] fn local_cli_branch_create_enforces_engine_layer_policy() { - let repo = SystemRepo::loaded(); - let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo)); - repo.write_config("policy.yaml", POLICY_E2E_YAML); + let graph = SystemGraph::loaded(); + let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph)); + graph.write_config("policy.yaml", POLICY_E2E_YAML); let denied = output_failure( cli() @@ -1327,9 +1333,9 @@ fn local_cli_branch_create_enforces_engine_layer_policy() { #[test] fn local_cli_branch_delete_enforces_engine_layer_policy() { - let repo = SystemRepo::loaded(); - let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo)); - repo.write_config("policy.yaml", POLICY_E2E_YAML); + let graph = SystemGraph::loaded(); + let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph)); + graph.write_config("policy.yaml", POLICY_E2E_YAML); // Pre-create the branch as ragnor so there's something to delete. output_success( @@ -1375,9 +1381,9 @@ fn local_cli_branch_delete_enforces_engine_layer_policy() { #[test] fn local_cli_branch_merge_enforces_engine_layer_policy() { - let repo = SystemRepo::loaded(); - let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo)); - repo.write_config("policy.yaml", POLICY_E2E_YAML); + let graph = SystemGraph::loaded(); + let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph)); + graph.write_config("policy.yaml", POLICY_E2E_YAML); // Pre-create a feature branch as ragnor (admins-branch-ops covers it). output_success( @@ -1431,7 +1437,7 @@ fn local_cli_branch_merge_enforces_engine_layer_policy() { // pin the precedence rule that `main.rs::resolve_cli_actor` implements: // `--as` flag > `cli.actor` from `omnigraph.yaml` > None. -fn local_policy_config_with_actor(repo: &SystemRepo, actor: &str) -> String { +fn local_policy_config_with_actor(graph: &SystemGraph, actor: &str) -> String { // Mirrors `local_policy_config` but adds `cli.actor` so the // config-only precedence path is exercised. The `cli:` block // already has `graph` and `branch`; appending `actor` here. @@ -1452,7 +1458,7 @@ query: policy: file: ./policy.yaml ", - yaml_string(&repo.path().to_string_lossy()), + yaml_string(&graph.path().to_string_lossy()), actor, ) } @@ -1462,13 +1468,13 @@ fn local_cli_actor_from_config_used_when_no_flag() { // cli.actor: act-ragnor in omnigraph.yaml, no --as flag → change // permitted via admins-write rule. Proves the config-only path // works; previously the only proof was structural. - let repo = SystemRepo::loaded(); - let config = repo.write_config( + let graph = SystemGraph::loaded(); + let config = graph.write_config( "omnigraph-policy.yaml", - &local_policy_config_with_actor(&repo, "act-ragnor"), + &local_policy_config_with_actor(&graph, "act-ragnor"), ); - repo.write_config("policy.yaml", POLICY_E2E_YAML); - let mutation_file = insert_person_query(&repo, "system-local-cli-actor.gq"); + graph.write_config("policy.yaml", POLICY_E2E_YAML); + let mutation_file = insert_person_query(&graph, "system-local-cli-actor.gq"); let allowed = parse_stdout_json(&output_success( cli() @@ -1490,13 +1496,13 @@ fn local_cli_actor_flag_overrides_config_actor() { // cli.actor: act-ragnor in config + --as act-bruno on CLI → change // denied. Flag wins per the precedence rule. Without this test, a // future change that reverses precedence would ride through silently. - let repo = SystemRepo::loaded(); - let config = repo.write_config( + let graph = SystemGraph::loaded(); + let config = graph.write_config( "omnigraph-policy.yaml", - &local_policy_config_with_actor(&repo, "act-ragnor"), + &local_policy_config_with_actor(&graph, "act-ragnor"), ); - repo.write_config("policy.yaml", POLICY_E2E_YAML); - let mutation_file = insert_person_query(&repo, "system-local-cli-actor-override.gq"); + graph.write_config("policy.yaml", POLICY_E2E_YAML); + let mutation_file = insert_person_query(&graph, "system-local-cli-actor-override.gq"); let denied = output_failure( cli() diff --git a/crates/omnigraph-cli/tests/system_remote.rs b/crates/omnigraph-cli/tests/system_remote.rs index 48f50ab..fc43d09 100644 --- a/crates/omnigraph-cli/tests/system_remote.rs +++ b/crates/omnigraph-cli/tests/system_remote.rs @@ -41,7 +41,7 @@ fn yaml_string(value: &str) -> String { format!("'{}'", value.replace('\'', "''")) } -fn remote_policy_server_config(repo: &SystemRepo) -> String { +fn remote_policy_server_config(graph: &SystemGraph) -> String { format!( "\ project: @@ -54,7 +54,7 @@ server: policy: file: ./policy.yaml ", - yaml_string(&repo.path().to_string_lossy()) + yaml_string(&graph.path().to_string_lossy()) ) } @@ -81,10 +81,10 @@ auth: #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_server_and_cli_end_to_end_flow() { - let repo = SystemRepo::loaded(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); - let mutation_file = repo.write_query( + let graph = SystemGraph::loaded(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let mutation_file = graph.write_query( "system-remote-change.gq", r#" query insert_person($name: String, $age: I32) { @@ -105,7 +105,7 @@ query insert_person($name: String, $age: I32) { assert_eq!(health["status"], "ok"); let local_snapshot = parse_stdout_json(&output_success( - cli().arg("snapshot").arg(repo.path()).arg("--json"), + cli().arg("snapshot").arg(graph.path()).arg("--json"), )); let snapshot = parse_stdout_json(&output_success( cli() @@ -120,7 +120,7 @@ query insert_person($name: String, $age: I32) { let local_read = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -180,7 +180,7 @@ query insert_person($name: String, $age: I32) { let local_verify = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -260,11 +260,11 @@ query insert_person($name: String, $age: I32) { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] -fn remote_schema_apply_via_cli_updates_repo() { - let repo = SystemRepo::initialized(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); - let next_schema = repo.write_file( +fn remote_schema_apply_via_cli_updates_graph() { + let graph = SystemGraph::initialized(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let next_schema = graph.write_file( "next.pg", &fs::read_to_string(fixture("test.pg")).unwrap().replace( " age: I32?\n}", @@ -286,7 +286,7 @@ fn remote_schema_apply_via_cli_updates_repo() { let db = tokio::runtime::Runtime::new() .unwrap() - .block_on(Omnigraph::open(repo.path().to_string_lossy().as_ref())) + .block_on(Omnigraph::open(graph.path().to_string_lossy().as_ref())) .unwrap(); assert!( db.catalog().node_types["Person"] @@ -298,10 +298,10 @@ fn remote_schema_apply_via_cli_updates_repo() { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_schema_apply_rejects_unsupported_plan() { - let repo = SystemRepo::initialized(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); - let breaking_schema = repo.write_file( + let graph = SystemGraph::initialized(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let breaking_schema = graph.write_file( "breaking.pg", &fs::read_to_string(fixture("test.pg")) .unwrap() @@ -324,7 +324,7 @@ fn remote_schema_apply_rejects_unsupported_plan() { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_schema_apply_rejects_when_non_main_branch_exists() { - let repo = SystemRepo::initialized(); + let graph = SystemGraph::initialized(); output_success( cli() .arg("branch") @@ -332,12 +332,12 @@ fn remote_schema_apply_rejects_when_non_main_branch_exists() { .arg("--from") .arg("main") .arg("--uri") - .arg(repo.path()) + .arg(graph.path()) .arg("feature"), ); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); - let next_schema = repo.write_file( + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let next_schema = graph.write_file( "next.pg", &fs::read_to_string(fixture("test.pg")).unwrap().replace( " age: I32?\n}", @@ -355,16 +355,16 @@ fn remote_schema_apply_rejects_when_non_main_branch_exists() { .arg(&next_schema), ); let stderr = String::from_utf8_lossy(&output.stderr); - assert!(stderr.contains("schema apply requires a repo with only main")); + assert!(stderr.contains("schema apply requires a graph with only main")); } #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_read_preserves_projection_order_in_json_and_csv() { - let repo = SystemRepo::loaded(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); - let ordered_query = repo.write_query( + let graph = SystemGraph::loaded(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let ordered_query = graph.write_query( "ordered-remote.gq", r#" query ordered_person($name: String) { @@ -419,10 +419,10 @@ query ordered_person($name: String) { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_branch_create_list_merge_flow() { - let repo = SystemRepo::loaded(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); - let mutation_file = repo.write_query( + let graph = SystemGraph::loaded(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let mutation_file = graph.write_query( "system-remote-branch-change.gq", r#" query insert_person($name: String, $age: I32) { @@ -516,9 +516,9 @@ query insert_person($name: String, $age: I32) { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_branch_delete_removes_branch() { - let repo = SystemRepo::loaded(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let graph = SystemGraph::loaded(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); parse_stdout_json(&output_success( cli() @@ -557,10 +557,10 @@ fn remote_branch_delete_removes_branch() { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_export_round_trips_full_branch_graph() { - let repo = SystemRepo::loaded(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); - let mutation_file = repo.write_query( + let graph = SystemGraph::loaded(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let mutation_file = graph.write_query( "system-remote-export-change.gq", r#" query insert_person($name: String, $age: I32) { @@ -624,8 +624,8 @@ query add_friend($from: String, $to: String) { .arg("feature") .arg("--jsonl"), )); - let export_path = repo.write_jsonl("system-remote-exported.jsonl", &exported); - let imported_repo = repo + let export_path = graph.write_jsonl("system-remote-exported.jsonl", &exported); + let imported_graph = graph .path() .parent() .unwrap() @@ -636,18 +636,18 @@ query add_friend($from: String, $to: String) { .arg("init") .arg("--schema") .arg(fixture("test.pg")) - .arg(&imported_repo), + .arg(&imported_graph), ); output_success( cli() .arg("load") .arg("--data") .arg(&export_path) - .arg(&imported_repo), + .arg(&imported_graph), ); let snapshot = parse_stdout_json(&output_success( - cli().arg("snapshot").arg(&imported_repo).arg("--json"), + cli().arg("snapshot").arg(&imported_graph).arg("--json"), )); assert_eq!( snapshot["tables"] @@ -671,7 +671,7 @@ query add_friend($from: String, $to: String) { let eve = parse_stdout_json(&output_success( cli() .arg("read") - .arg(&imported_repo) + .arg(&imported_graph) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -687,10 +687,10 @@ query add_friend($from: String, $to: String) { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_ingest_creates_review_branch_and_keeps_it_readable() { - let repo = SystemRepo::loaded(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); - let ingest_data = repo.write_jsonl( + let graph = SystemGraph::loaded(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let ingest_data = graph.write_jsonl( "system-remote-ingest.jsonl", r#"{"type":"Person","data":{"name":"Zoe","age":33}} {"type":"Person","data":{"name":"Bob","age":26}}"#, @@ -747,9 +747,9 @@ fn remote_ingest_creates_review_branch_and_keeps_it_readable() { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_ingest_reuses_existing_branch_and_merges_updates() { - let repo = SystemRepo::loaded(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let graph = SystemGraph::loaded(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); output_success( cli() @@ -762,7 +762,7 @@ fn remote_ingest_reuses_existing_branch_and_merges_updates() { .arg("feature-ingest"), ); - let ingest_data = repo.write_jsonl( + let ingest_data = graph.write_jsonl( "system-remote-ingest-merge.jsonl", r#"{"type":"Person","data":{"name":"Bob","age":26}} {"type":"Person","data":{"name":"Zoe","age":33}}"#, @@ -828,23 +828,23 @@ fn remote_ingest_reuses_existing_branch_and_merges_updates() { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_policy_enforces_branch_first_cli_workflow() { - let repo = SystemRepo::loaded(); + let graph = SystemGraph::loaded(); let server_config = - repo.write_config("server-policy.yaml", &remote_policy_server_config(&repo)); - repo.write_config("policy.yaml", REMOTE_POLICY_E2E_YAML); - let server = repo.spawn_server_with_config_env( + graph.write_config("server-policy.yaml", &remote_policy_server_config(&graph)); + graph.write_config("policy.yaml", REMOTE_POLICY_E2E_YAML); + let server = graph.spawn_server_with_config_env( &server_config, &[( "OMNIGRAPH_SERVER_BEARER_TOKENS_JSON", r#"{"act-bruno":"team-token","act-ragnor":"admin-token"}"#, )], ); - let client_config = repo.write_config( + let client_config = graph.write_config( "omnigraph-policy.yaml", &remote_policy_client_config(&server.base_url), ); - repo.write_config(".env.omni", "POLICY_TEST_TOKEN=team-token\n"); - let mutation_file = repo.write_query( + graph.write_config(".env.omni", "POLICY_TEST_TOKEN=team-token\n"); + let mutation_file = graph.write_query( "system-remote-policy-change.gq", r#" query insert_person($name: String, $age: I32) { diff --git a/crates/omnigraph-compiler/Cargo.toml b/crates/omnigraph-compiler/Cargo.toml index 7bb8df0..229b862 100644 --- a/crates/omnigraph-compiler/Cargo.toml +++ b/crates/omnigraph-compiler/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "omnigraph-compiler" -version = "0.4.2" +version = "0.6.0" edition = "2024" description = "Schema/query compiler for Omnigraph. Zero Lance dependency." license = "MIT" diff --git a/crates/omnigraph-compiler/src/query/lint.rs b/crates/omnigraph-compiler/src/query/lint.rs index 38ae6ee..5f56774 100644 --- a/crates/omnigraph-compiler/src/query/lint.rs +++ b/crates/omnigraph-compiler/src/query/lint.rs @@ -38,7 +38,7 @@ pub enum QueryLintQueryKind { #[serde(rename_all = "lowercase")] pub enum QueryLintSchemaSourceKind { File, - Repo, + Graph, } #[derive(Debug, Clone, PartialEq, Eq, Serialize)] @@ -59,9 +59,9 @@ impl QueryLintSchemaSource { } } - pub fn repo(uri: impl Into) -> Self { + pub fn graph(uri: impl Into) -> Self { Self { - kind: QueryLintSchemaSourceKind::Repo, + kind: QueryLintSchemaSourceKind::Graph, path: None, uri: Some(uri.into()), } diff --git a/crates/omnigraph-policy/Cargo.toml b/crates/omnigraph-policy/Cargo.toml index 3e19ce8..dacda35 100644 --- a/crates/omnigraph-policy/Cargo.toml +++ b/crates/omnigraph-policy/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "omnigraph-policy" -version = "0.4.2" +version = "0.6.0" edition = "2024" description = "Policy / authorization layer for Omnigraph — Cedar-backed PolicyEngine, PolicyChecker trait, ResourceScope enum." license = "MIT" diff --git a/crates/omnigraph-policy/src/lib.rs b/crates/omnigraph-policy/src/lib.rs index 41ddf82..f24124f 100644 --- a/crates/omnigraph-policy/src/lib.rs +++ b/crates/omnigraph-policy/src/lib.rs @@ -172,7 +172,7 @@ pub struct PolicyCompiler; #[derive(Clone)] pub struct PolicyEngine { - repo_id: String, + graph_id: String, protected_branches: BTreeSet, known_actors: BTreeSet, schema: Schema, @@ -291,7 +291,7 @@ impl PolicyTestConfig { } impl PolicyCompiler { - pub fn compile(config: &PolicyConfig, repo_id: &str) -> Result { + pub fn compile(config: &PolicyConfig, graph_id: &str) -> Result { config.validate()?; let (schema, schema_warnings) = Schema::from_cedarschema_str(policy_schema_source())?; let schema_warnings = schema_warnings @@ -300,8 +300,8 @@ impl PolicyCompiler { if !schema_warnings.is_empty() { bail!("policy schema warnings:\n{}", schema_warnings.join("\n")); } - let entities = compile_entities(config, repo_id, &schema)?; - let (policies, policy_to_rule) = compile_policies(config, repo_id)?; + let entities = compile_entities(config, graph_id, &schema)?; + let (policies, policy_to_rule) = compile_policies(config, graph_id)?; let validator = Validator::new(schema.clone()); let validation = validator.validate(&policies, ValidationMode::Strict); let errors = validation @@ -318,7 +318,7 @@ impl PolicyCompiler { .flat_map(|members| members.iter().cloned()) .collect(); Ok(PolicyEngine { - repo_id: repo_id.to_string(), + graph_id: graph_id.to_string(), protected_branches: config.protected_branches.iter().cloned().collect(), known_actors, schema, @@ -330,9 +330,9 @@ impl PolicyCompiler { } impl PolicyEngine { - pub fn load(path: &Path, repo_id: &str) -> Result { + pub fn load(path: &Path, graph_id: &str) -> Result { let config = PolicyConfig::load(path)?; - PolicyCompiler::compile(&config, repo_id) + PolicyCompiler::compile(&config, graph_id) } pub fn authorize(&self, request: &PolicyRequest) -> Result { @@ -349,7 +349,7 @@ impl PolicyEngine { let principal = entity_uid("Actor", &request.actor_id)?; let action = entity_uid("Action", request.action.as_str())?; - let resource = entity_uid("Repo", &self.repo_id)?; + let resource = entity_uid("Graph", &self.graph_id)?; let context_value = json!({ "has_branch": request.branch.is_some(), "branch": request.branch.clone().unwrap_or_default(), @@ -462,7 +462,7 @@ impl PolicyEngine { } } -fn compile_entities(config: &PolicyConfig, repo_id: &str, schema: &Schema) -> Result { +fn compile_entities(config: &PolicyConfig, graph_id: &str, schema: &Schema) -> Result { let mut group_entities = Vec::new(); for group in config.groups.keys() { group_entities.push(Entity::new( @@ -495,8 +495,8 @@ fn compile_entities(config: &PolicyConfig, repo_id: &str, schema: &Schema) -> Re )?); } - let repo_entity = Entity::new( - entity_uid("Repo", repo_id)?, + let graph_entity = Entity::new( + entity_uid("Graph", graph_id)?, HashMap::new(), HashSet::::new(), )?; @@ -504,13 +504,13 @@ fn compile_entities(config: &PolicyConfig, repo_id: &str, schema: &Schema) -> Re let mut entities = Vec::new(); entities.extend(group_entities); entities.extend(actor_entities); - entities.push(repo_entity); + entities.push(graph_entity); Ok(Entities::from_entities(entities, Some(schema))?) } fn compile_policies( config: &PolicyConfig, - repo_id: &str, + graph_id: &str, ) -> Result<(PolicySet, HashMap)> { let mut policies = Vec::new(); let mut policy_to_rule = HashMap::new(); @@ -518,7 +518,7 @@ fn compile_policies( for rule in &config.rules { for action in &rule.allow.actions { let policy_id = PolicyId::new(format!("{}:{}", rule.id, action.as_str())); - let source = compile_policy_source(rule, action, repo_id); + let source = compile_policy_source(rule, action, graph_id); let policy = Policy::parse(Some(policy_id.clone()), source.as_str())?; policy_to_rule.insert(policy_id.to_string(), rule.id.clone()); policies.push(policy); @@ -528,7 +528,7 @@ fn compile_policies( Ok((PolicySet::from_policies(policies)?, policy_to_rule)) } -fn compile_policy_source(rule: &PolicyRule, action: &PolicyAction, repo_id: &str) -> String { +fn compile_policy_source(rule: &PolicyRule, action: &PolicyAction, graph_id: &str) -> String { let mut conditions = Vec::new(); if let Some(scope) = rule.allow.branch_scope { conditions.push(branch_scope_condition(scope)); @@ -547,11 +547,11 @@ fn compile_policy_source(rule: &PolicyRule, action: &PolicyAction, repo_id: &str r#"permit ( principal in Omnigraph::Group::{group}, action == Omnigraph::Action::{action}, - resource == Omnigraph::Repo::{repo} + resource == Omnigraph::Graph::{graph} ){when};"#, group = cedar_literal(&rule.allow.actors.group), action = cedar_literal(action.as_str()), - repo = cedar_literal(repo_id), + graph = cedar_literal(graph_id), when = when, ) } @@ -594,16 +594,16 @@ namespace Omnigraph { entity Actor in [Group]; entity Group; - entity Repo; + entity Graph; - action "read" appliesTo { principal: Actor, resource: Repo, context: RequestContext }; - action "export" appliesTo { principal: Actor, resource: Repo, context: RequestContext }; - action "change" appliesTo { principal: Actor, resource: Repo, context: RequestContext }; - action "schema_apply" appliesTo { principal: Actor, resource: Repo, context: RequestContext }; - action "branch_create" appliesTo { principal: Actor, resource: Repo, context: RequestContext }; - action "branch_delete" appliesTo { principal: Actor, resource: Repo, context: RequestContext }; - action "branch_merge" appliesTo { principal: Actor, resource: Repo, context: RequestContext }; - action "admin" appliesTo { principal: Actor, resource: Repo, context: RequestContext }; + action "read" appliesTo { principal: Actor, resource: Graph, context: RequestContext }; + action "export" appliesTo { principal: Actor, resource: Graph, context: RequestContext }; + action "change" appliesTo { principal: Actor, resource: Graph, context: RequestContext }; + action "schema_apply" appliesTo { principal: Actor, resource: Graph, context: RequestContext }; + action "branch_create" appliesTo { principal: Actor, resource: Graph, context: RequestContext }; + action "branch_delete" appliesTo { principal: Actor, resource: Graph, context: RequestContext }; + action "branch_merge" appliesTo { principal: Actor, resource: Graph, context: RequestContext }; + action "admin" appliesTo { principal: Actor, resource: Graph, context: RequestContext }; } "# } @@ -881,7 +881,7 @@ rules: ) .unwrap(); - let engine = PolicyCompiler::compile(&policy, "repo").unwrap(); + let engine = PolicyCompiler::compile(&policy, "graph").unwrap(); let allow = engine .authorize(&PolicyRequest { actor_id: "act-bruno".to_string(), @@ -932,7 +932,7 @@ rules: "#, ) .unwrap(); - let engine = PolicyCompiler::compile(&policy, "repo").unwrap(); + let engine = PolicyCompiler::compile(&policy, "graph").unwrap(); let tests = PolicyTestConfig { version: 1, cases: vec![ @@ -976,7 +976,7 @@ rules: ) .unwrap(); - let engine = PolicyCompiler::compile(&policy, "repo").unwrap(); + let engine = PolicyCompiler::compile(&policy, "graph").unwrap(); let allow = engine .authorize(&PolicyRequest { actor_id: "act-ragnor".to_string(), diff --git a/crates/omnigraph-server/Cargo.toml b/crates/omnigraph-server/Cargo.toml index 2c89ed4..0372adc 100644 --- a/crates/omnigraph-server/Cargo.toml +++ b/crates/omnigraph-server/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "omnigraph-server" -version = "0.4.2" +version = "0.6.0" edition = "2024" description = "HTTP server for the Omnigraph graph database." license = "MIT" @@ -19,9 +19,9 @@ default = [] aws = ["dep:aws-config", "dep:aws-sdk-secretsmanager"] [dependencies] -omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.4.2" } -omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" } -omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" } +omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.0" } +omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" } +omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" } axum = { workspace = true } clap = { workspace = true } color-eyre = { workspace = true } @@ -45,4 +45,5 @@ aws-sdk-secretsmanager = { version = "1", optional = true, default-features = fa tempfile = { workspace = true } tower = { workspace = true } serial_test = "3" +lance = { workspace = true } lance-index = { workspace = true } diff --git a/crates/omnigraph-server/examples/bench_actor_isolation.rs b/crates/omnigraph-server/examples/bench_actor_isolation.rs index 4e3299c..5a708e0 100644 --- a/crates/omnigraph-server/examples/bench_actor_isolation.rs +++ b/crates/omnigraph-server/examples/bench_actor_isolation.rs @@ -259,10 +259,10 @@ async fn main() { } let temp = tempfile::tempdir().expect("tempdir"); - let repo = temp.path().join("bench.omni"); - Omnigraph::init(repo.to_str().unwrap(), SCHEMA) + let graph = temp.path().join("bench.omni"); + Omnigraph::init(graph.to_str().unwrap(), SCHEMA) .await - .expect("init repo"); + .expect("init graph"); // Build bearer tokens: one for the heavy actor + one per light actor. let mut tokens: Vec<(String, String)> = @@ -270,21 +270,17 @@ async fn main() { for i in 0..args.light_actors { tokens.push((format!("act-light-{i}"), format!("light-token-{i}"))); } - let db = Omnigraph::open(repo.to_str().unwrap()) + let db = Omnigraph::open(graph.to_str().unwrap()) .await - .expect("open repo"); + .expect("open graph"); // Construct a custom WorkloadController with the requested caps and // pass it through `AppState::new_with_workload`. Avoids the // `unsafe { std::env::set_var(...) }` antipattern that violates // `setenv`'s thread-safety precondition once the multi-thread tokio // runtime is up. let workload = WorkloadController::new(args.inflight_cap, args.byte_cap); - let state = AppState::new_with_workload( - repo.to_string_lossy().to_string(), - db, - tokens, - workload, - ); + let state = + AppState::new_with_workload(graph.to_string_lossy().to_string(), db, tokens, workload); let app = build_app(state); eprintln!( diff --git a/crates/omnigraph-server/examples/bench_concurrent_http.rs b/crates/omnigraph-server/examples/bench_concurrent_http.rs index c114dfb..6a8411a 100644 --- a/crates/omnigraph-server/examples/bench_concurrent_http.rs +++ b/crates/omnigraph-server/examples/bench_concurrent_http.rs @@ -152,7 +152,9 @@ async fn drive_actor( errors += 1; // Drain body for logging on the first few failures. if errors <= 3 { - let body = to_bytes(response.into_body(), 64 * 1024).await.unwrap_or_default(); + let body = to_bytes(response.into_body(), 64 * 1024) + .await + .unwrap_or_default(); eprintln!( "actor {actor_idx} op {op_idx} status {status} body {}", String::from_utf8_lossy(&body) @@ -173,13 +175,13 @@ async fn main() { } let temp = tempfile::tempdir().expect("tempdir"); - let repo = temp.path().join("bench.omni"); + let graph = temp.path().join("bench.omni"); let schema = build_schema(args.tables); - Omnigraph::init(repo.to_str().unwrap(), &schema) + Omnigraph::init(graph.to_str().unwrap(), &schema) .await - .expect("init repo"); + .expect("init graph"); - let state = AppState::open(repo.to_string_lossy().to_string()) + let state = AppState::open(graph.to_string_lossy().to_string()) .await .expect("open AppState"); let app = build_app(state); diff --git a/crates/omnigraph-server/src/lib.rs b/crates/omnigraph-server/src/lib.rs index 618677f..c06c1ed 100644 --- a/crates/omnigraph-server/src/lib.rs +++ b/crates/omnigraph-server/src/lib.rs @@ -18,6 +18,7 @@ use api::{ IngestRequest, QueryRequest, ReadOutput, ReadRequest, SchemaApplyOutput, SchemaApplyRequest, SchemaOutput, SnapshotQuery, ingest_output, schema_apply_output, snapshot_payload, }; +pub use auth::{AWS_SECRET_ENV, EnvOrFileTokenSource, TokenSource, resolve_token_source}; use axum::body::{Body, Bytes}; use axum::extract::DefaultBodyLimit; use axum::extract::{Extension, Path, Query, Request, State}; @@ -39,7 +40,6 @@ use omnigraph::error::{ManifestConflictDetails, ManifestErrorKind, OmniError}; use omnigraph_compiler::json_params_to_param_map; use omnigraph_compiler::query::parser::parse_query; use omnigraph_compiler::{JsonParamMode, ParamMap}; -pub use auth::{AWS_SECRET_ENV, EnvOrFileTokenSource, TokenSource, resolve_token_source}; pub use policy::{ PolicyAction, PolicyCompiler, PolicyConfig, PolicyDecision, PolicyEngine, PolicyExpectation, PolicyRequest, PolicyTestConfig, @@ -439,10 +439,7 @@ impl ApiError { } } - fn manifest_version_conflict( - message: String, - details: api::ManifestConflictOutput, - ) -> Self { + fn manifest_version_conflict(message: String, details: api::ManifestConflictOutput) -> Self { Self { status: StatusCode::CONFLICT, code: ErrorCode::Conflict, @@ -2112,12 +2109,12 @@ server: ("OMNIGRAPH_UNAUTHENTICATED", None), ]); let temp = tempdir().unwrap(); - // Repo path doesn't need to exist — classifier fires before + // Graph path doesn't need to exist — classifier fires before // `AppState::open_with_bearer_tokens_and_policy`. let config = ServerConfig { uri: temp .path() - .join("repo.omni") + .join("graph.omni") .to_string_lossy() .into_owned(), bind: "127.0.0.1:0".to_string(), @@ -2125,7 +2122,8 @@ server: allow_unauthenticated: false, }; let result = serve(config).await; - let err = result.expect_err("serve should refuse to start in State 1 without --unauthenticated"); + let err = + result.expect_err("serve should refuse to start in State 1 without --unauthenticated"); let msg = format!("{:?}", err); assert!( msg.contains("no bearer tokens") || msg.contains("policy file"), diff --git a/crates/omnigraph-server/src/main.rs b/crates/omnigraph-server/src/main.rs index 54af1ed..4e1c256 100644 --- a/crates/omnigraph-server/src/main.rs +++ b/crates/omnigraph-server/src/main.rs @@ -8,7 +8,7 @@ use omnigraph_server::{ServerConfig, init_tracing, load_server_settings, serve}; #[command(name = "omnigraph-server")] #[command(about = "HTTP server for the Omnigraph graph database")] struct Cli { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, diff --git a/crates/omnigraph-server/tests/openapi.rs b/crates/omnigraph-server/tests/openapi.rs index 70824fb..a9d04e0 100644 --- a/crates/omnigraph-server/tests/openapi.rs +++ b/crates/omnigraph-server/tests/openapi.rs @@ -19,42 +19,42 @@ fn fixture(name: &str) -> PathBuf { .join(name) } -fn repo_path(root: &Path) -> PathBuf { +fn graph_path(root: &Path) -> PathBuf { root.join("openapi_test.omni") } -async fn init_loaded_repo() -> tempfile::TempDir { +async fn init_loaded_graph() -> tempfile::TempDir { let temp = tempfile::tempdir().unwrap(); - let repo = repo_path(temp.path()); - fs::create_dir_all(&repo).unwrap(); + let graph = graph_path(temp.path()); + fs::create_dir_all(&graph).unwrap(); let schema = fs::read_to_string(fixture("test.pg")).unwrap(); let data = fs::read_to_string(fixture("test.jsonl")).unwrap(); - Omnigraph::init(repo.to_str().unwrap(), &schema) + Omnigraph::init(graph.to_str().unwrap(), &schema) .await .unwrap(); - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); load_jsonl(&mut db, &data, LoadMode::Overwrite) .await .unwrap(); temp } -async fn app_for_loaded_repo() -> (tempfile::TempDir, Router) { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let state = AppState::open(repo.to_string_lossy().to_string()) +async fn app_for_loaded_graph() -> (tempfile::TempDir, Router) { + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let state = AppState::open(graph.to_string_lossy().to_string()) .await .unwrap(); let app = build_app(state); (temp, app) } -async fn app_for_loaded_repo_with_auth(token: &str) -> (tempfile::TempDir, Router) { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); +async fn app_for_loaded_graph_with_auth(token: &str) -> (tempfile::TempDir, Router) { + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); let state = AppState::new_with_bearer_token( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), db, Some(token.to_string()), ); @@ -84,7 +84,7 @@ fn openapi_json() -> Value { #[tokio::test] async fn openapi_endpoint_returns_200_with_valid_json() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let request = Request::builder() .method(Method::GET) .uri("/openapi.json") @@ -97,7 +97,7 @@ async fn openapi_endpoint_returns_200_with_valid_json() { #[tokio::test] async fn openapi_endpoint_returns_openapi_31_version() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let request = Request::builder() .method(Method::GET) .uri("/openapi.json") @@ -113,11 +113,11 @@ async fn openapi_endpoint_returns_openapi_31_version() { #[tokio::test] async fn openapi_endpoint_does_not_require_auth() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); let state = AppState::new_with_bearer_token( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), db, Some("secret-token".to_string()), ); @@ -129,7 +129,11 @@ async fn openapi_endpoint_does_not_require_auth() { .body(Body::empty()) .unwrap(); let (status, _) = json_response(&app, request).await; - assert_eq!(status, StatusCode::OK, "/openapi.json should not require auth"); + assert_eq!( + status, + StatusCode::OK, + "/openapi.json should not require auth" + ); } // --------------------------------------------------------------------------- @@ -739,10 +743,13 @@ fn branch_delete_has_branch_path_parameter() { let params = doc["paths"]["/branches/{branch}"]["delete"]["parameters"] .as_array() .unwrap(); - let has_branch = params.iter().any(|p| { - p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("path") - }); - assert!(has_branch, "DELETE /branches/{{branch}} must have 'branch' path parameter"); + let has_branch = params + .iter() + .any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("path")); + assert!( + has_branch, + "DELETE /branches/{{branch}} must have 'branch' path parameter" + ); } #[test] @@ -751,10 +758,13 @@ fn commit_show_has_commit_id_path_parameter() { let params = doc["paths"]["/commits/{commit_id}"]["get"]["parameters"] .as_array() .unwrap(); - let has_commit_id = params.iter().any(|p| { - p["name"].as_str() == Some("commit_id") && p["in"].as_str() == Some("path") - }); - assert!(has_commit_id, "GET /commits/{{commit_id}} must have 'commit_id' path parameter"); + let has_commit_id = params + .iter() + .any(|p| p["name"].as_str() == Some("commit_id") && p["in"].as_str() == Some("path")); + assert!( + has_commit_id, + "GET /commits/{{commit_id}} must have 'commit_id' path parameter" + ); } #[test] @@ -763,10 +773,13 @@ fn snapshot_has_branch_query_parameter() { let params = doc["paths"]["/snapshot"]["get"]["parameters"] .as_array() .unwrap(); - let has_branch = params.iter().any(|p| { - p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query") - }); - assert!(has_branch, "GET /snapshot must have 'branch' query parameter"); + let has_branch = params + .iter() + .any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query")); + assert!( + has_branch, + "GET /snapshot must have 'branch' query parameter" + ); } #[test] @@ -775,10 +788,13 @@ fn commits_has_branch_query_parameter() { let params = doc["paths"]["/commits"]["get"]["parameters"] .as_array() .unwrap(); - let has_branch = params.iter().any(|p| { - p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query") - }); - assert!(has_branch, "GET /commits must have 'branch' query parameter"); + let has_branch = params + .iter() + .any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query")); + assert!( + has_branch, + "GET /commits must have 'branch' query parameter" + ); } // --------------------------------------------------------------------------- @@ -854,8 +870,7 @@ fn error_responses_reference_error_output_schema() { ]; for (path, method, status) in paths_with_errors { - let content = - &doc["paths"][path][method]["responses"][status]["content"]; + let content = &doc["paths"][path][method]["responses"][status]["content"]; let schema = &content["application/json"]["schema"]; let ref_path = schema["$ref"].as_str().unwrap(); assert!( @@ -917,7 +932,7 @@ fn openapi_spec_round_trips_through_json() { #[tokio::test] async fn open_mode_spec_has_no_security_schemes() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let request = Request::builder() .method(Method::GET) .uri("/openapi.json") @@ -933,7 +948,7 @@ async fn open_mode_spec_has_no_security_schemes() { #[tokio::test] async fn open_mode_spec_has_no_operation_security() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let request = Request::builder() .method(Method::GET) .uri("/openapi.json") @@ -954,7 +969,7 @@ async fn open_mode_spec_has_no_operation_security() { #[tokio::test] async fn auth_mode_spec_includes_bearer_token_security_scheme() { - let (_temp, app) = app_for_loaded_repo_with_auth("secret").await; + let (_temp, app) = app_for_loaded_graph_with_auth("secret").await; let request = Request::builder() .method(Method::GET) .uri("/openapi.json") @@ -968,7 +983,7 @@ async fn auth_mode_spec_includes_bearer_token_security_scheme() { #[tokio::test] async fn auth_mode_spec_has_security_on_protected_operations() { - let (_temp, app) = app_for_loaded_repo_with_auth("secret").await; + let (_temp, app) = app_for_loaded_graph_with_auth("secret").await; let request = Request::builder() .method(Method::GET) .uri("/openapi.json") @@ -999,7 +1014,7 @@ async fn auth_mode_spec_has_security_on_protected_operations() { #[tokio::test] async fn auth_mode_spec_matches_static_generation() { - let (_temp, app) = app_for_loaded_repo_with_auth("secret").await; + let (_temp, app) = app_for_loaded_graph_with_auth("secret").await; let request = Request::builder() .method(Method::GET) .uri("/openapi.json") @@ -1015,7 +1030,7 @@ async fn auth_mode_spec_matches_static_generation() { #[tokio::test] async fn auth_mode_healthz_still_has_no_security() { - let (_temp, app) = app_for_loaded_repo_with_auth("secret").await; + let (_temp, app) = app_for_loaded_graph_with_auth("secret").await; let request = Request::builder() .method(Method::GET) .uri("/openapi.json") @@ -1031,8 +1046,7 @@ async fn auth_mode_healthz_still_has_no_security() { #[test] fn openapi_spec_is_up_to_date() { - let spec_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("../../openapi.json"); + let spec_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../openapi.json"); let generated = serde_json::to_string_pretty(&openapi_doc()).unwrap() + "\n"; diff --git a/crates/omnigraph-server/tests/server.rs b/crates/omnigraph-server/tests/server.rs index 0b25840..0ee7307 100644 --- a/crates/omnigraph-server/tests/server.rs +++ b/crates/omnigraph-server/tests/server.rs @@ -7,7 +7,7 @@ use axum::Router; use axum::body::{Body, to_bytes}; use axum::http::header::AUTHORIZATION; use axum::http::{Method, Request, StatusCode}; -use lance_index::traits::DatasetIndexExt; +use lance::index::DatasetIndexExt; use omnigraph::db::{Omnigraph, ReadTarget, SchemaApplyOptions}; use omnigraph::error::OmniError; use omnigraph::loader::{LoadMode, load_jsonl}; @@ -105,39 +105,39 @@ fn fixture(name: &str) -> PathBuf { .join(name) } -async fn init_loaded_repo() -> tempfile::TempDir { - init_repo_with_schema_and_data( +async fn init_loaded_graph() -> tempfile::TempDir { + init_graph_with_schema_and_data( &fs::read_to_string(fixture("test.pg")).unwrap(), &fs::read_to_string(fixture("test.jsonl")).unwrap(), ) .await } -async fn init_repo_with_schema_and_data(schema: &str, data: &str) -> tempfile::TempDir { +async fn init_graph_with_schema_and_data(schema: &str, data: &str) -> tempfile::TempDir { let temp = tempfile::tempdir().unwrap(); - let repo = repo_path(temp.path()); - fs::create_dir_all(&repo).unwrap(); - Omnigraph::init(repo.to_str().unwrap(), schema) + let graph = graph_path(temp.path()); + fs::create_dir_all(&graph).unwrap(); + Omnigraph::init(graph.to_str().unwrap(), schema) .await .unwrap(); - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); load_jsonl(&mut db, data, LoadMode::Overwrite) .await .unwrap(); temp } -async fn init_repo_with_schema(schema: &str) -> tempfile::TempDir { +async fn init_graph_with_schema(schema: &str) -> tempfile::TempDir { let temp = tempfile::tempdir().unwrap(); - let repo = repo_path(temp.path()); - fs::create_dir_all(&repo).unwrap(); - Omnigraph::init(repo.to_str().unwrap(), schema) + let graph = graph_path(temp.path()); + fs::create_dir_all(&graph).unwrap(); + Omnigraph::init(graph.to_str().unwrap(), schema) .await .unwrap(); temp } -fn repo_path(root: &Path) -> PathBuf { +fn graph_path(root: &Path) -> PathBuf { root.join("server.omni") } @@ -147,8 +147,8 @@ fn drifted_test_schema() -> String { .replace("age: I32?", "age: I64?") } -async fn manifest_dataset_version(repo: &Path) -> u64 { - Omnigraph::open(repo.to_string_lossy().as_ref()) +async fn manifest_dataset_version(graph: &Path) -> u64 { + Omnigraph::open(graph.to_string_lossy().as_ref()) .await .unwrap() .snapshot_of(ReadTarget::branch("main")) @@ -157,7 +157,7 @@ async fn manifest_dataset_version(repo: &Path) -> u64 { .version() } -fn s3_test_repo_uri(suite: &str) -> Option { +fn s3_test_graph_uri(suite: &str) -> Option { let bucket = env::var("OMNIGRAPH_S3_TEST_BUCKET").ok()?; let prefix = env::var("OMNIGRAPH_S3_TEST_PREFIX") .ok() @@ -170,10 +170,10 @@ fn s3_test_repo_uri(suite: &str) -> Option { Some(format!("s3://{}/{}/{}/{}", bucket, prefix, suite, unique)) } -async fn app_for_loaded_repo() -> (tempfile::TempDir, Router) { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let state = AppState::open(repo.to_string_lossy().to_string()) +async fn app_for_loaded_graph() -> (tempfile::TempDir, Router) { + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let state = AppState::open(graph.to_string_lossy().to_string()) .await .unwrap(); (temp, build_app(state)) @@ -186,7 +186,7 @@ async fn app_for_loaded_repo() -> (tempfile::TempDir, Router) { /// so test cases retain their pre-MR-723 semantics ("auth required, /// every action permitted") without conflicting with the new state /// matrix. Tests that specifically need the State-2 deny path use -/// `app_for_repo_with_auth_tokens_only` instead. +/// `app_for_graph_with_auth_tokens_only` instead. fn permit_all_policy_yaml(actors: &[&str]) -> String { let members = actors .iter() @@ -214,15 +214,15 @@ rules: ) } -async fn app_for_loaded_repo_with_auth(token: &str) -> (tempfile::TempDir, Router) { +async fn app_for_loaded_graph_with_auth(token: &str) -> (tempfile::TempDir, Router) { // `AppState::new_with_bearer_token(token)` maps the token to actor "default"; // permit-all policy needs to include that actor. - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, permit_all_policy_yaml(&["default"])).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![("default".to_string(), token.to_string())], Some(&policy_path), ) @@ -231,16 +231,16 @@ async fn app_for_loaded_repo_with_auth(token: &str) -> (tempfile::TempDir, Route (temp, build_app(state)) } -async fn app_for_loaded_repo_with_auth_tokens( +async fn app_for_loaded_graph_with_auth_tokens( tokens: &[(&str, &str)], ) -> (tempfile::TempDir, Router) { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); let policy_path = temp.path().join("policy.yaml"); let actors: Vec<&str> = tokens.iter().map(|(actor, _)| *actor).collect(); fs::write(&policy_path, permit_all_policy_yaml(&actors)).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), tokens .iter() .map(|(actor, token)| ((*actor).to_string(), (*token).to_string())) @@ -252,16 +252,16 @@ async fn app_for_loaded_repo_with_auth_tokens( (temp, build_app(state)) } -async fn app_for_loaded_repo_with_auth_tokens_and_policy( +async fn app_for_loaded_graph_with_auth_tokens_and_policy( tokens: &[(&str, &str)], policy: &str, ) -> (tempfile::TempDir, Router) { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, policy).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), tokens .iter() .map(|(actor, token)| ((*actor).to_string(), (*token).to_string())) @@ -273,17 +273,17 @@ async fn app_for_loaded_repo_with_auth_tokens_and_policy( (temp, build_app(state)) } -async fn app_for_repo_with_auth_tokens_and_policy( +async fn app_for_graph_with_auth_tokens_and_policy( schema: &str, tokens: &[(&str, &str)], policy: &str, ) -> (tempfile::TempDir, Router) { - let temp = init_repo_with_schema(schema).await; - let repo = repo_path(temp.path()); + let temp = init_graph_with_schema(schema).await; + let graph = graph_path(temp.path()); let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, policy).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), tokens .iter() .map(|(actor, token)| ((*actor).to_string(), (*token).to_string())) @@ -299,14 +299,14 @@ async fn app_for_repo_with_auth_tokens_and_policy( /// Exercises ServerRuntimeState::DefaultDeny — authenticated requests /// for Read succeed, every other action is rejected with 403 from /// `authorize_request`'s state-2 branch. -async fn app_for_repo_with_auth_tokens_only( +async fn app_for_graph_with_auth_tokens_only( schema: &str, tokens: &[(&str, &str)], ) -> (tempfile::TempDir, Router) { - let temp = init_repo_with_schema(schema).await; - let repo = repo_path(temp.path()); + let temp = init_graph_with_schema(schema).await; + let graph = graph_path(temp.path()); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), tokens .iter() .map(|(actor, token)| ((*actor).to_string(), (*token).to_string())) @@ -388,8 +388,8 @@ async fn json_response(app: &Router, request: Request) -> (StatusCode, Val } #[tokio::test] -async fn schema_apply_route_updates_repo_for_authorized_admin() { - let (temp, app) = app_for_repo_with_auth_tokens_and_policy( +async fn schema_apply_route_updates_graph_for_authorized_admin() { + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, @@ -414,8 +414,8 @@ async fn schema_apply_route_updates_repo_for_authorized_admin() { assert_eq!(status, StatusCode::OK); assert_eq!(payload["applied"], true); - let repo = repo_path(temp.path()); - let reopened = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let graph = graph_path(temp.path()); + let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); assert!( reopened.catalog().node_types["Person"] .properties @@ -425,7 +425,7 @@ async fn schema_apply_route_updates_repo_for_authorized_admin() { #[tokio::test] async fn schema_apply_route_requires_schema_apply_policy_permission() { - let (_temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (_temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], POLICY_YAML, @@ -456,7 +456,7 @@ async fn schema_apply_route_requires_schema_apply_policy_permission() { #[tokio::test] async fn schema_apply_route_requires_bearer_token_when_policy_enabled() { - let (_temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (_temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, @@ -486,7 +486,7 @@ async fn schema_apply_route_requires_bearer_token_when_policy_enabled() { #[tokio::test] async fn schema_apply_route_can_rename_type() { - let (temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, @@ -510,8 +510,8 @@ async fn schema_apply_route_can_rename_type() { assert_eq!(status, StatusCode::OK); assert_eq!(payload["applied"], true); - let repo = repo_path(temp.path()); - let reopened = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let graph = graph_path(temp.path()); + let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); let snapshot = reopened .snapshot_of(ReadTarget::branch("main")) .await @@ -522,7 +522,7 @@ async fn schema_apply_route_can_rename_type() { #[tokio::test] async fn schema_apply_route_can_rename_property() { - let (temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, @@ -546,8 +546,8 @@ async fn schema_apply_route_can_rename_property() { assert_eq!(status, StatusCode::OK); assert_eq!(payload["applied"], true); - let repo = repo_path(temp.path()); - let reopened = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let graph = graph_path(temp.path()); + let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); let person = &reopened.catalog().node_types["Person"]; assert!(person.properties.contains_key("years")); assert!(!person.properties.contains_key("age")); @@ -555,15 +555,15 @@ async fn schema_apply_route_can_rename_property() { #[tokio::test] async fn schema_apply_route_can_add_index() { - let (temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, ) .await; - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let before_index_count = { - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); let snapshot = db.snapshot_of(ReadTarget::branch("main")).await.unwrap(); let dataset = snapshot.open("node:Person").await.unwrap(); dataset.load_indices().await.unwrap().len() @@ -586,7 +586,7 @@ async fn schema_apply_route_can_add_index() { assert_eq!(status, StatusCode::OK); assert_eq!(payload["applied"], true); - let reopened = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); let snapshot = reopened .snapshot_of(ReadTarget::branch("main")) .await @@ -598,7 +598,7 @@ async fn schema_apply_route_can_add_index() { #[tokio::test] async fn schema_apply_route_rejects_unsupported_plan() { - let (_temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (_temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, @@ -629,16 +629,16 @@ async fn schema_apply_route_rejects_unsupported_plan() { #[tokio::test] async fn schema_apply_route_rejects_when_non_main_branch_exists() { - let temp = init_repo_with_schema(&fs::read_to_string(fixture("test.pg")).unwrap()).await; - let repo = repo_path(temp.path()); - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let temp = init_graph_with_schema(&fs::read_to_string(fixture("test.pg")).unwrap()).await; + let graph = graph_path(temp.path()); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.branch_create("feature").await.unwrap(); drop(db); let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, SCHEMA_APPLY_POLICY_YAML).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![("act-ragnor".to_string(), "admin-token".to_string())], Some(&policy_path), ) @@ -754,7 +754,7 @@ fn mock_embedding(input: &str, dim: usize) -> Vec { #[tokio::test(flavor = "multi_thread")] async fn healthz_succeeds_after_startup() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let (status, body) = json_response( &app, Request::builder() @@ -776,9 +776,9 @@ async fn healthz_succeeds_after_startup() { #[tokio::test(flavor = "multi_thread")] async fn schema_drift_returns_conflict_for_snapshot_read_and_change() { - let (temp, app) = app_for_loaded_repo().await; - let repo = repo_path(temp.path()); - fs::write(repo.join("_schema.pg"), drifted_test_schema()).unwrap(); + let (temp, app) = app_for_loaded_graph().await; + let graph = graph_path(temp.path()); + fs::write(graph.join("_schema.pg"), drifted_test_schema()).unwrap(); let (snapshot_status, snapshot_body) = json_response( &app, @@ -861,7 +861,7 @@ async fn schema_drift_returns_conflict_for_snapshot_read_and_change() { #[tokio::test(flavor = "multi_thread")] async fn protected_routes_require_bearer_token() { - let (_temp, app) = app_for_loaded_repo_with_auth("demo-token").await; + let (_temp, app) = app_for_loaded_graph_with_auth("demo-token").await; let (status, body) = json_response( &app, Request::builder() @@ -882,7 +882,7 @@ async fn protected_routes_require_bearer_token() { #[tokio::test(flavor = "multi_thread")] async fn protected_routes_accept_valid_bearer_token_while_healthz_stays_open() { - let (_temp, app) = app_for_loaded_repo_with_auth("demo-token").await; + let (_temp, app) = app_for_loaded_graph_with_auth("demo-token").await; let health = app .clone() @@ -915,9 +915,9 @@ async fn protected_routes_accept_valid_bearer_token_while_healthz_stays_open() { #[tokio::test(flavor = "multi_thread")] async fn export_route_returns_jsonl_for_branch_snapshot() { let token = "demo-token"; - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.branch_create_from(ReadTarget::branch("main"), "feature") .await .unwrap(); @@ -942,7 +942,7 @@ async fn export_route_returns_jsonl_for_branch_snapshot() { let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, permit_all_policy_yaml(&["default"])).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![("default".to_string(), token.to_string())], Some(&policy_path), ) @@ -983,9 +983,11 @@ async fn export_route_returns_jsonl_for_branch_snapshot() { #[tokio::test(flavor = "multi_thread")] async fn protected_routes_accept_any_configured_team_bearer_token() { - let (_temp, app) = - app_for_loaded_repo_with_auth_tokens(&[("team-01", "token-one"), ("team-02", "token-two")]) - .await; + let (_temp, app) = app_for_loaded_graph_with_auth_tokens(&[ + ("team-01", "token-one"), + ("team-02", "token-two"), + ]) + .await; let (status, body) = json_response( &app, @@ -1009,8 +1011,8 @@ async fn protected_routes_accept_any_configured_team_bearer_token() { /// the policy outcome. #[tokio::test(flavor = "multi_thread")] async fn bearer_token_resolves_to_correct_actor_for_policy_decisions() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); let policy_path = temp.path().join("policy.yaml"); fs::write( &policy_path, @@ -1030,7 +1032,7 @@ rules: ) .unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![ ("act-a".to_string(), "token-a".to_string()), ("act-b".to_string(), "token-b".to_string()), @@ -1110,8 +1112,8 @@ rules: /// → actor identity contract. #[tokio::test(flavor = "multi_thread")] async fn actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); let policy_path = temp.path().join("policy.yaml"); // Same readers/writers split as // `bearer_token_resolves_to_correct_actor_for_policy_decisions` — @@ -1135,7 +1137,7 @@ rules: ) .unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![ ("act-a".to_string(), "token-a".to_string()), ("act-b".to_string(), "token-b".to_string()), @@ -1215,7 +1217,7 @@ rules: #[tokio::test(flavor = "multi_thread")] async fn policy_allows_read_but_distinguishes_401_from_403() { - let (_temp, app) = app_for_loaded_repo_with_auth_tokens_and_policy( + let (_temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy( &[("act-bruno", "team-token"), ("act-ragnor", "admin-token")], POLICY_YAML, ) @@ -1291,16 +1293,16 @@ async fn policy_allows_read_but_distinguishes_401_from_403() { #[tokio::test(flavor = "multi_thread")] async fn policy_uses_resolved_branch_for_snapshot_reads() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); let snapshot_id = { - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.resolve_snapshot("main").await.unwrap().to_string() }; let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, POLICY_PROTECTED_READ_YAML).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![("act-bruno".to_string(), "team-token".to_string())], Some(&policy_path), ) @@ -1338,9 +1340,9 @@ async fn policy_uses_resolved_branch_for_snapshot_reads() { #[tokio::test(flavor = "multi_thread")] async fn snapshot_route_returns_manifest_dataset_version() { - let (temp, app) = app_for_loaded_repo().await; - let repo = repo_path(temp.path()); - let expected_manifest_version = manifest_dataset_version(&repo).await; + let (temp, app) = app_for_loaded_graph().await; + let graph = graph_path(temp.path()); + let expected_manifest_version = manifest_dataset_version(&graph).await; let (snapshot_status, snapshot_body) = json_response( &app, @@ -1363,7 +1365,7 @@ async fn snapshot_route_returns_manifest_dataset_version() { #[tokio::test(flavor = "multi_thread")] async fn schema_route_returns_current_source() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let (status, body) = json_response( &app, Request::builder() @@ -1381,7 +1383,7 @@ async fn schema_route_returns_current_source() { #[tokio::test(flavor = "multi_thread")] async fn schema_route_requires_bearer_token_when_auth_configured() { - let (_temp, app) = app_for_loaded_repo_with_auth("demo-token").await; + let (_temp, app) = app_for_loaded_graph_with_auth("demo-token").await; let (missing_status, missing_body) = json_response( &app, @@ -1416,13 +1418,13 @@ async fn schema_route_requires_bearer_token_when_auth_configured() { #[tokio::test(flavor = "multi_thread")] async fn schema_route_denied_when_actor_lacks_read_permission() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); let policy_path = temp.path().join("policy.yaml"); // Policy grants branch_create only — no read action for act-bruno. fs::write(&policy_path, INGEST_CREATE_ONLY_POLICY_YAML).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![("act-bruno".to_string(), "team-token".to_string())], Some(&policy_path), ) @@ -1450,9 +1452,9 @@ async fn schema_route_denied_when_actor_lacks_read_permission() { #[tokio::test(flavor = "multi_thread")] async fn policy_blocks_change_on_protected_main_but_allows_unprotected_branch() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.branch_create_from(ReadTarget::branch("main"), "feature") .await .unwrap(); @@ -1461,7 +1463,7 @@ async fn policy_blocks_change_on_protected_main_but_allows_unprotected_branch() let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, POLICY_YAML).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![("act-bruno".to_string(), "team-token".to_string())], Some(&policy_path), ) @@ -1517,9 +1519,9 @@ async fn policy_blocks_change_on_protected_main_but_allows_unprotected_branch() #[tokio::test(flavor = "multi_thread")] async fn policy_blocks_non_admin_merge_to_main_and_allows_admin() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.branch_create_from(ReadTarget::branch("main"), "feature") .await .unwrap(); @@ -1535,7 +1537,7 @@ async fn policy_blocks_non_admin_merge_to_main_and_allows_admin() { let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, POLICY_YAML).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![ ("act-bruno".to_string(), "team-token".to_string()), ("act-ragnor".to_string(), "admin-token".to_string()), @@ -1587,7 +1589,7 @@ async fn policy_blocks_non_admin_merge_to_main_and_allows_admin() { async fn authenticated_change_stamps_actor_on_commits() { // With the Run state machine removed, actor_id is recorded // directly on the commit graph (no intermediate run record). - let (_temp, app) = app_for_loaded_repo_with_auth_tokens(&[("act-andrew", "token-one")]).await; + let (_temp, app) = app_for_loaded_graph_with_auth_tokens(&[("act-andrew", "token-one")]).await; let change = ChangeRequest { query: MUTATION_QUERIES.to_string(), @@ -1630,8 +1632,8 @@ async fn authenticated_change_stamps_actor_on_commits() { #[tokio::test(flavor = "multi_thread")] async fn ingest_creates_branch_returns_metadata_and_stamps_actor() { - let (temp, app) = app_for_loaded_repo_with_auth_tokens(&[("act-andrew", "token-one")]).await; - let repo = repo_path(temp.path()); + let (temp, app) = app_for_loaded_graph_with_auth_tokens(&[("act-andrew", "token-one")]).await; + let graph = graph_path(temp.path()); let ingest = IngestRequest { branch: Some("feature-ingest".to_string()), from: Some("main".to_string()), @@ -1661,7 +1663,7 @@ async fn ingest_creates_branch_returns_metadata_and_stamps_actor() { assert_eq!(body["tables"][0]["table_key"], "node:Person"); assert_eq!(body["tables"][0]["rows_loaded"], 2); - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); let snapshot = db .snapshot_of(ReadTarget::branch("feature-ingest")) .await @@ -1680,10 +1682,10 @@ async fn ingest_creates_branch_returns_metadata_and_stamps_actor() { #[tokio::test(flavor = "multi_thread")] async fn ingest_existing_branch_skips_branch_create_policy_check() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); { - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.branch_create_from(ReadTarget::branch("main"), "feature") .await .unwrap(); @@ -1691,7 +1693,7 @@ async fn ingest_existing_branch_skips_branch_create_policy_check() { let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, POLICY_YAML).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![("act-bruno".to_string(), "team-token".to_string())], Some(&policy_path), ) @@ -1724,7 +1726,7 @@ async fn ingest_existing_branch_skips_branch_create_policy_check() { #[tokio::test(flavor = "multi_thread")] async fn ingest_denies_missing_branch_without_branch_create_permission() { - let (_temp, app) = app_for_loaded_repo_with_auth_tokens_and_policy( + let (_temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy( &[("act-bruno", "team-token")], POLICY_YAML, ) @@ -1757,7 +1759,7 @@ async fn ingest_denies_missing_branch_without_branch_create_permission() { #[tokio::test(flavor = "multi_thread")] async fn ingest_denies_when_actor_lacks_change_permission() { - let (_temp, app) = app_for_loaded_repo_with_auth_tokens_and_policy( + let (_temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy( &[("act-bruno", "team-token")], INGEST_CREATE_ONLY_POLICY_YAML, ) @@ -1790,7 +1792,7 @@ async fn ingest_denies_when_actor_lacks_change_permission() { #[tokio::test(flavor = "multi_thread")] async fn ingest_rejects_payloads_over_32_mib() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let oversize = IngestRequest { branch: Some("feature".to_string()), from: Some("main".to_string()), @@ -1815,7 +1817,7 @@ async fn ingest_rejects_payloads_over_32_mib() { #[tokio::test(flavor = "multi_thread")] async fn authenticated_branch_merge_stamps_merge_actor_on_head_commit() { - let (_temp, app) = app_for_loaded_repo_with_auth_tokens(&[ + let (_temp, app) = app_for_loaded_graph_with_auth_tokens(&[ ("act-andrew", "token-one"), ("act-ragnor", "token-two"), ]) @@ -1896,9 +1898,9 @@ async fn authenticated_branch_merge_stamps_merge_actor_on_head_commit() { #[tokio::test(flavor = "multi_thread")] async fn branch_merge_conflict_response_includes_structured_conflicts() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.branch_create_from(ReadTarget::branch("main"), "feature") .await .unwrap(); @@ -1934,7 +1936,7 @@ async fn branch_merge_conflict_response_includes_structured_conflicts() { .unwrap(); drop(db); - let state = AppState::open(repo.to_string_lossy().to_string()) + let state = AppState::open(graph.to_string_lossy().to_string()) .await .unwrap(); let app = build_app(state); @@ -1966,7 +1968,7 @@ async fn branch_merge_conflict_response_includes_structured_conflicts() { #[tokio::test(flavor = "multi_thread")] async fn repeated_read_after_change_sees_updated_state_from_same_app() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let change = ChangeRequest { query: MUTATION_QUERIES.to_string(), @@ -2011,7 +2013,7 @@ async fn repeated_read_after_change_sees_updated_state_from_same_app() { #[tokio::test(flavor = "multi_thread")] async fn query_endpoint_runs_inline_read() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let query = QueryRequest { query: fs::read_to_string(fixture("test.gq")).unwrap(), @@ -2038,7 +2040,7 @@ async fn query_endpoint_runs_inline_read() { #[tokio::test(flavor = "multi_thread")] async fn query_endpoint_rejects_mutation_with_400() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let query = QueryRequest { query: MUTATION_QUERIES.to_string(), @@ -2069,7 +2071,7 @@ async fn query_endpoint_rejects_mutation_with_400() { async fn mutate_endpoint_runs_inline_mutation() { // Canonical mutation endpoint. Pairs with `/query` on the read side. // Same wire shape as `/change`, no deprecation signal. - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let request = json!({ "query": MUTATION_QUERIES, @@ -2109,7 +2111,7 @@ async fn change_endpoint_emits_deprecation_headers() { // per RFC 9745 (`Deprecation: true`) + RFC 8288 (`Link: ; // rel="successor-version"`). The OpenAPI side is covered by // `openapi_change_is_deprecated` in tests/openapi.rs. - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let request = json!({ "query": MUTATION_QUERIES, @@ -2150,7 +2152,7 @@ async fn change_endpoint_emits_deprecation_headers() { async fn read_endpoint_emits_deprecation_headers() { // `/read` is kept indefinitely for byte-stable back-compat but flagged // at runtime per RFC 9745 + RFC 8288. Successor is `/query`. - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let request = ReadRequest { query_source: fs::read_to_string(fixture("test.gq")).unwrap(), @@ -2193,7 +2195,7 @@ async fn query_endpoint_does_not_emit_deprecation_headers() { // Sanity check the inverse: the canonical `/query` endpoint must not // carry deprecation signaling, so SDK codegens don't propagate a // bogus `@deprecated` marker. - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let request = QueryRequest { query: fs::read_to_string(fixture("test.gq")).unwrap(), @@ -2227,7 +2229,7 @@ async fn change_endpoint_accepts_legacy_field_names() { // The canonical wire field names on /change are `query` and `name`, but // serde aliases keep the legacy `query_source`/`query_name` payload // shape working for clients that haven't migrated yet. Pin both shapes. - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let legacy_body = json!({ "query_source": MUTATION_QUERIES, @@ -2270,7 +2272,7 @@ async fn change_endpoint_accepts_legacy_field_names() { #[tokio::test(flavor = "multi_thread")] async fn remote_branch_list_create_merge_flow_works() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let (list_status, list_body) = json_response( &app, @@ -2397,7 +2399,7 @@ async fn remote_branch_list_create_merge_flow_works() { #[tokio::test(flavor = "multi_thread")] async fn remote_branch_delete_flow_works() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let create = BranchCreateRequest { from: Some("main".to_string()), @@ -2442,14 +2444,14 @@ async fn remote_branch_delete_flow_works() { #[tokio::test(flavor = "multi_thread")] async fn branch_delete_denies_without_policy_permission() { - let (temp, app) = app_for_loaded_repo_with_auth_tokens_and_policy( + let (temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy( &[("act-andrew", "token-admin"), ("act-bruno", "token-team")], POLICY_YAML, ) .await; - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.branch_create_from(ReadTarget::branch("main"), "feature") .await .unwrap(); @@ -2475,8 +2477,8 @@ async fn branch_delete_denies_without_policy_permission() { } #[tokio::test(flavor = "multi_thread")] -async fn server_opens_s3_repo_directly_and_serves_snapshot_and_read() { - let Some(uri) = s3_test_repo_uri("server") else { +async fn server_opens_s3_graph_directly_and_serves_snapshot_and_read() { + let Some(uri) = s3_test_graph_uri("server") else { eprintln!("skipping s3 server test: OMNIGRAPH_S3_TEST_BUCKET is not set"); return; }; @@ -2574,9 +2576,9 @@ query vector_search_string($q: String) { ("OMNIGRAPH_EMBEDDINGS_MOCK", Some("1")), ("GEMINI_API_KEY", None), ]); - let temp = init_repo_with_schema_and_data(EMBED_SCHEMA, &data).await; - let repo = repo_path(temp.path()); - let state = AppState::open(repo.to_string_lossy().to_string()) + let temp = init_graph_with_schema_and_data(EMBED_SCHEMA, &data).await; + let graph = graph_path(temp.path()); + let state = AppState::open(graph.to_string_lossy().to_string()) .await .unwrap(); let app = build_app(state); @@ -2610,20 +2612,20 @@ async fn change_conflict_returns_manifest_conflict_409() { // a structured `manifest_conflict` body — `table_key`, `expected`, // and `actual` — so clients can detect-and-retry without parsing // the message. - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); // Build the server first so its handle pins the pre-mutation manifest // version. Then advance the manifest from outside the server. The // server's next /change call will capture stale `expected_versions` // (from its still-pinned snapshot) and the publisher's CAS rejects. - let state = AppState::open(repo.to_string_lossy().to_string()) + let state = AppState::open(graph.to_string_lossy().to_string()) .await .unwrap(); let app = build_app(state); { - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.mutate( "main", MUTATION_QUERIES, @@ -2693,9 +2695,9 @@ async fn change_concurrent_inserts_same_key_serialize_without_409() { // node type and asserts: every request returns 200 (no 409), // and the final row count equals the seed count + N (every // staged batch actually committed). - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let state = AppState::open(repo.to_string_lossy().to_string()) + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let state = AppState::open(graph.to_string_lossy().to_string()) .await .unwrap(); let app = build_app(state); @@ -2806,9 +2808,9 @@ async fn change_concurrent_updates_same_key_serialize_via_publisher_cas() { // Lance error variant. The drift check fires at the right architectural // layer (engine boundary, under the queue) and respects the existing // `MutationOpKind` policy. - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let state = AppState::open(repo.to_string_lossy().to_string()) + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let state = AppState::open(graph.to_string_lossy().to_string()) .await .unwrap(); let app = build_app(state); @@ -2847,10 +2849,7 @@ async fn change_concurrent_updates_same_key_serialize_via_publisher_cas() { } let statuses: Vec = results.iter().map(|(s, _)| *s).collect(); - let ok_count = statuses - .iter() - .filter(|s| **s == StatusCode::OK) - .count(); + let ok_count = statuses.iter().filter(|s| **s == StatusCode::OK).count(); let conflict_count = statuses .iter() .filter(|s| **s == StatusCode::CONFLICT) @@ -2880,7 +2879,8 @@ async fn change_concurrent_updates_same_key_serialize_via_publisher_cas() { statuses ); assert_eq!( - ok_count, 1, + ok_count, + 1, "expected exactly one update to commit and N-1 to receive 409 manifest_conflict \ (op-kind-aware drift check rejects stale-V0 staged datasets at commit_all entry). \ Got {} OK + {} 409 + {} other. \ @@ -2937,8 +2937,8 @@ mod matrix { impl Harness { pub async fn new() -> Self { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); // Build the WorkloadController explicitly with defaults rather // than letting `AppState::open` call // `WorkloadController::from_env()`. The admission-gate test @@ -2951,20 +2951,16 @@ mod matrix { // 429 instead of the expected 200. Constructing the // controller here with explicit defaults makes cells // independent of any env mutation other tests perform. - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); - let workload = - omnigraph_server::workload::WorkloadController::with_defaults(); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); + let workload = omnigraph_server::workload::WorkloadController::with_defaults(); let state = AppState::new_with_workload( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), db, Vec::new(), workload, ); let app = build_app(state); - Self { - _temp: temp, - app, - } + Self { _temp: temp, app } } pub async fn create_branch(&self, from: &str, name: &str) { @@ -3057,12 +3053,7 @@ mod matrix { ) .await .unwrap(); - assert_eq!( - r.status(), - StatusCode::OK, - "snapshot {} failed", - branch - ); + assert_eq!(r.status(), StatusCode::OK, "snapshot {} failed", branch); let body = to_bytes(r.into_body(), usize::MAX).await.unwrap(); let v: Value = serde_json::from_slice(&body).unwrap(); v["tables"] @@ -3081,10 +3072,7 @@ mod matrix { /// just count. pub async fn person_exists(&self, branch: &str, name: &str) -> bool { let body = serde_json::to_vec(&ReadRequest { - query_source: include_str!( - "../../omnigraph/tests/fixtures/test.gq" - ) - .to_string(), + query_source: include_str!("../../omnigraph/tests/fixtures/test.gq").to_string(), query_name: Some("get_person".to_string()), params: Some(json!({ "name": name })), branch: Some(branch.to_string()), @@ -3203,12 +3191,12 @@ mod matrix { .unwrap(); let response = app .oneshot( - Request::builder() - .uri("/branches/merge") - .method(Method::POST) - .header("content-type", "application/json") - .body(Body::from(body)) - .unwrap(), + Request::builder() + .uri("/branches/merge") + .method(Method::POST) + .header("content-type", "application/json") + .body(Body::from(body)) + .unwrap(), ) .await .unwrap(); @@ -3239,12 +3227,12 @@ mod matrix { .unwrap(); let response = app .oneshot( - Request::builder() - .uri("/change") - .method(Method::POST) - .header("content-type", "application/json") - .body(Body::from(body)) - .unwrap(), + Request::builder() + .uri("/change") + .method(Method::POST) + .header("content-type", "application/json") + .body(Body::from(body)) + .unwrap(), ) .await .unwrap(); @@ -3272,12 +3260,12 @@ mod matrix { .unwrap(); let response = app .oneshot( - Request::builder() - .uri("/branches") - .method(Method::POST) - .header("content-type", "application/json") - .body(Body::from(body)) - .unwrap(), + Request::builder() + .uri("/branches") + .method(Method::POST) + .header("content-type", "application/json") + .body(Body::from(body)) + .unwrap(), ) .await .unwrap(); @@ -3299,11 +3287,11 @@ mod matrix { barrier.wait().await; let response = app .oneshot( - Request::builder() - .uri(format!("/branches/{}", name)) - .method(Method::DELETE) - .body(Body::empty()) - .unwrap(), + Request::builder() + .uri(format!("/branches/{}", name)) + .method(Method::DELETE) + .body(Body::empty()) + .unwrap(), ) .await .unwrap(); @@ -3337,14 +3325,8 @@ async fn concurrent_branch_ops_morphological_matrix() { let (sa, sb) = h .run_pair( - matrix::op_merge( - "feature-a-cella".to_string(), - "target-a-cella".to_string(), - ), - matrix::op_merge( - "feature-b-cella".to_string(), - "target-b-cella".to_string(), - ), + matrix::op_merge("feature-a-cella".to_string(), "target-a-cella".to_string()), + matrix::op_merge("feature-b-cella".to_string(), "target-b-cella".to_string()), ) .await; assert_eq!(sa.status, StatusCode::OK, "[{}] merge a", cell); @@ -3387,20 +3369,15 @@ async fn concurrent_branch_ops_morphological_matrix() { let cell = "c:merge×merge:same-source-distinct-targets"; let h = matrix::Harness::new().await; h.create_branch("main", "src-shared-cellc").await; - h.insert_person("src-shared-cellc", "Sharon-cellc", 50).await; + h.insert_person("src-shared-cellc", "Sharon-cellc", 50) + .await; h.create_branch("main", "tgt-1-cellc").await; h.create_branch("main", "tgt-2-cellc").await; let (sa, sb) = h .run_pair( - matrix::op_merge( - "src-shared-cellc".to_string(), - "tgt-1-cellc".to_string(), - ), - matrix::op_merge( - "src-shared-cellc".to_string(), - "tgt-2-cellc".to_string(), - ), + matrix::op_merge("src-shared-cellc".to_string(), "tgt-1-cellc".to_string()), + matrix::op_merge("src-shared-cellc".to_string(), "tgt-2-cellc".to_string()), ) .await; assert_eq!(sa.status, StatusCode::OK, "[{}] merge into tgt-1", cell); @@ -3442,7 +3419,11 @@ async fn concurrent_branch_ops_morphological_matrix() { let conflict = error .manifest_conflict .expect("merge 409 must include manifest_conflict"); - assert_eq!(conflict.table_key, "node:Person", "[{}] conflict table", cell); + assert_eq!( + conflict.table_key, "node:Person", + "[{}] conflict table", + cell + ); h.assert_persons("main", cell, &["FrankD-celld"], &["EveD-celld"]) .await; } @@ -3495,22 +3476,18 @@ async fn concurrent_branch_ops_morphological_matrix() { let (sa, sb) = h .run_pair( - matrix::op_branch_create( - "alpha-cellf".to_string(), - "gamma-cellf".to_string(), - ), - matrix::op_branch_create( - "beta-cellf".to_string(), - "delta-cellf".to_string(), - ), + matrix::op_branch_create("alpha-cellf".to_string(), "gamma-cellf".to_string()), + matrix::op_branch_create("beta-cellf".to_string(), "delta-cellf".to_string()), ) .await; assert_eq!(sa.status, StatusCode::OK, "[{}] gamma create", cell); assert_eq!(sb.status, StatusCode::OK, "[{}] delta create", cell); // gamma forks off alpha → must contain Eve. - h.assert_persons("gamma-cellf", cell, &["Eve-cellf"], &[]).await; + h.assert_persons("gamma-cellf", cell, &["Eve-cellf"], &[]) + .await; // delta forks off beta → must NOT contain Eve. - h.assert_persons("delta-cellf", cell, &[], &["Eve-cellf"]).await; + h.assert_persons("delta-cellf", cell, &[], &["Eve-cellf"]) + .await; h.assert_post_op_sentinel(cell, "sentinel-cellf").await; } @@ -3531,7 +3508,8 @@ async fn concurrent_branch_ops_morphological_matrix() { assert_eq!(sa.status, StatusCode::OK, "[{}] create newborn", cell); assert_eq!(sb.status, StatusCode::OK, "[{}] delete doomed", cell); // newborn-cellg exists with main's content. - h.assert_persons("newborn-cellg", cell, &["Alice"], &[]).await; + h.assert_persons("newborn-cellg", cell, &["Alice"], &[]) + .await; h.assert_post_op_sentinel(cell, "sentinel-cellg").await; } @@ -3661,14 +3639,18 @@ async fn concurrent_branch_ops_morphological_matrix() { let conflict = error .manifest_conflict .expect("merge 409 must include manifest_conflict"); - assert_eq!(conflict.table_key, "node:Person", "[{}] conflict table", cell); + assert_eq!( + conflict.table_key, "node:Person", + "[{}] conflict table", + cell + ); h.assert_persons("main", cell, &["Steve-cellk"], &["Rita-cellk"]) .await; } - // Reopen via a fresh AppState on the same repo. - let repo_uri = format!("{}/server.omni", h._temp.path().display()); - let reopened = AppState::open(repo_uri.clone()).await.unwrap(); + // Reopen via a fresh AppState on the same graph. + let graph_uri = format!("{}/server.omni", h._temp.path().display()); + let reopened = AppState::open(graph_uri.clone()).await.unwrap(); let app2 = build_app(reopened); // Sanity: the same identity check via the new app must see // Rita and Steve. @@ -3731,9 +3713,9 @@ query insert_c($name: String) { const SEED_COMPANIES: u64 = 2; const PER_TYPE: usize = 4; - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let state = AppState::open(repo.to_string_lossy().to_string()) + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let state = AppState::open(graph.to_string_lossy().to_string()) .await .unwrap(); let app = build_app(state); @@ -3806,7 +3788,11 @@ query insert_c($name: String) { let lookup_count = |table_key: &str| -> u64 { body["tables"] .as_array() - .and_then(|tables| tables.iter().find(|t| t["table_key"].as_str() == Some(table_key))) + .and_then(|tables| { + tables + .iter() + .find(|t| t["table_key"].as_str() == Some(table_key)) + }) .and_then(|t| t["row_count"].as_u64()) .unwrap_or_else(|| panic!("snapshot missing {}", table_key)) }; @@ -3851,9 +3837,9 @@ async fn ingest_per_actor_admission_cap_returns_429() { // `AppState::new_with_workload` constructor closes that bug class — // this test no longer mutates global state and no longer needs // `#[serial]`. - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); let workload = omnigraph_server::workload::WorkloadController::new( 1, // per-actor in-flight cap (the fixture under test) 1_000_000_000, // per-actor byte budget — large so it never bottlenecks @@ -3864,13 +3850,11 @@ async fn ingest_per_actor_admission_cap_returns_429() { // enough to clear the State 3 path so the test reaches workload. let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, permit_all_policy_yaml(&["act-flooder"])).unwrap(); - let policy_engine = omnigraph_server::PolicyEngine::load( - &policy_path, - repo.to_string_lossy().as_ref(), - ) - .unwrap(); + let policy_engine = + omnigraph_server::PolicyEngine::load(&policy_path, graph.to_string_lossy().as_ref()) + .unwrap(); let state = AppState::new_with_workload( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), db, vec![("act-flooder".to_string(), "flooder-token".to_string())], workload, @@ -3970,7 +3954,7 @@ async fn ingest_per_actor_admission_cap_returns_429() { #[tokio::test(flavor = "multi_thread")] async fn oversized_request_body_returns_payload_too_large() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let oversized = "x".repeat(1_100_000); let response = app .clone() @@ -3998,7 +3982,7 @@ async fn oversized_request_body_returns_payload_too_large() { #[tokio::test(flavor = "multi_thread")] async fn default_deny_mode_allows_read_for_authenticated_actor() { - let (_temp, app) = app_for_repo_with_auth_tokens_only( + let (_temp, app) = app_for_graph_with_auth_tokens_only( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-andrew", "demo-token")], ) @@ -4019,7 +4003,7 @@ async fn default_deny_mode_allows_read_for_authenticated_actor() { #[tokio::test(flavor = "multi_thread")] async fn default_deny_mode_rejects_change_with_forbidden() { - let (_temp, app) = app_for_repo_with_auth_tokens_only( + let (_temp, app) = app_for_graph_with_auth_tokens_only( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-andrew", "demo-token")], ) @@ -4053,7 +4037,7 @@ async fn default_deny_mode_rejects_change_with_forbidden() { #[tokio::test(flavor = "multi_thread")] async fn default_deny_mode_rejects_schema_apply_with_forbidden() { - let (_temp, app) = app_for_repo_with_auth_tokens_only( + let (_temp, app) = app_for_graph_with_auth_tokens_only( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-andrew", "demo-token")], ) @@ -4061,7 +4045,7 @@ async fn default_deny_mode_rejects_schema_apply_with_forbidden() { let req = SchemaApplyRequest { schema_source: additive_schema_with_nickname(), - ..Default::default() + ..Default::default() }; let (status, body) = json_response( &app, @@ -4121,13 +4105,13 @@ enum ParityDecision { Deny, } -async fn build_parity_repo() -> (tempfile::TempDir, PathBuf, PathBuf) { - // Build a repo with `main` loaded and a `feature` branch ready for - // merge. Returns the repo path and a written policy.yaml path. - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); +async fn build_parity_graph() -> (tempfile::TempDir, PathBuf, PathBuf) { + // Build a graph with `main` loaded and a `feature` branch ready for + // merge. Returns the graph path and a written policy.yaml path. + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); { - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.branch_create_from(ReadTarget::branch("main"), "feature") .await .unwrap(); @@ -4142,12 +4126,12 @@ async fn build_parity_repo() -> (tempfile::TempDir, PathBuf, PathBuf) { } let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, PARITY_POLICY_YAML).unwrap(); - (temp, repo, policy_path) + (temp, graph, policy_path) } -async fn sdk_change_decision(repo: &Path, policy_path: &Path, actor: &str) -> ParityDecision { - let policy = PolicyEngine::load(policy_path, repo.to_string_lossy().as_ref()).unwrap(); - let db = Omnigraph::open(repo.to_str().unwrap()) +async fn sdk_change_decision(graph: &Path, policy_path: &Path, actor: &str) -> ParityDecision { + let policy = PolicyEngine::load(policy_path, graph.to_string_lossy().as_ref()).unwrap(); + let db = Omnigraph::open(graph.to_str().unwrap()) .await .unwrap() .with_policy(Arc::new(policy) as Arc); @@ -4160,7 +4144,13 @@ async fn sdk_change_decision(repo: &Path, policy_path: &Path, actor: &str) -> Pa ); params.insert("age".to_string(), omnigraph_compiler::Literal::Integer(30)); let result = db - .mutate_as("main", MUTATION_QUERIES, "insert_person", ¶ms, Some(actor)) + .mutate_as( + "main", + MUTATION_QUERIES, + "insert_person", + ¶ms, + Some(actor), + ) .await; match result { Ok(_) => ParityDecision::Allow, @@ -4170,13 +4160,13 @@ async fn sdk_change_decision(repo: &Path, policy_path: &Path, actor: &str) -> Pa } async fn http_change_decision( - repo: &Path, + graph: &Path, policy_path: &PathBuf, actor: &str, token: &str, ) -> ParityDecision { let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![(actor.to_string(), token.to_string())], Some(policy_path), ) @@ -4207,9 +4197,9 @@ async fn http_change_decision( } } -async fn sdk_merge_decision(repo: &Path, policy_path: &Path, actor: &str) -> ParityDecision { - let policy = PolicyEngine::load(policy_path, repo.to_string_lossy().as_ref()).unwrap(); - let db = Omnigraph::open(repo.to_str().unwrap()) +async fn sdk_merge_decision(graph: &Path, policy_path: &Path, actor: &str) -> ParityDecision { + let policy = PolicyEngine::load(policy_path, graph.to_string_lossy().as_ref()).unwrap(); + let db = Omnigraph::open(graph.to_str().unwrap()) .await .unwrap() .with_policy(Arc::new(policy) as Arc); @@ -4222,13 +4212,13 @@ async fn sdk_merge_decision(repo: &Path, policy_path: &Path, actor: &str) -> Par } async fn http_merge_decision( - repo: &Path, + graph: &Path, policy_path: &PathBuf, actor: &str, token: &str, ) -> ParityDecision { let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![(actor.to_string(), token.to_string())], Some(policy_path), ) @@ -4260,12 +4250,12 @@ async fn http_merge_decision( #[tokio::test(flavor = "multi_thread")] async fn policy_decision_parity_change_admin_on_main_allowed() { // (act-ragnor, change, main) — admins-change-anywhere rule applies. - // Both SDK and HTTP must allow. Each path uses its own fresh repo + // Both SDK and HTTP must allow. Each path uses its own fresh graph // because allow→side-effects. - let (_t1, repo1, policy1) = build_parity_repo().await; - let sdk = sdk_change_decision(&repo1, &policy1, "act-ragnor").await; - let (_t2, repo2, policy2) = build_parity_repo().await; - let http = http_change_decision(&repo2, &policy2, "act-ragnor", "ragnor-token").await; + let (_t1, graph1, policy1) = build_parity_graph().await; + let sdk = sdk_change_decision(&graph1, &policy1, "act-ragnor").await; + let (_t2, graph2, policy2) = build_parity_graph().await; + let http = http_change_decision(&graph2, &policy2, "act-ragnor", "ragnor-token").await; assert!( matches!(sdk, ParityDecision::Allow) && matches!(http, ParityDecision::Allow), "SDK={sdk:?} HTTP={http:?} — should both Allow", @@ -4275,11 +4265,11 @@ async fn policy_decision_parity_change_admin_on_main_allowed() { #[tokio::test(flavor = "multi_thread")] async fn policy_decision_parity_change_team_on_main_denied() { // (act-bruno, change, main) — no rule grants bruno change on - // protected. Both SDK and HTTP must deny. Same repo is reusable + // protected. Both SDK and HTTP must deny. Same graph is reusable // because deny→no side-effects. - let (_temp, repo, policy) = build_parity_repo().await; - let sdk = sdk_change_decision(&repo, &policy, "act-bruno").await; - let http = http_change_decision(&repo, &policy, "act-bruno", "bruno-token").await; + let (_temp, graph, policy) = build_parity_graph().await; + let sdk = sdk_change_decision(&graph, &policy, "act-bruno").await; + let http = http_change_decision(&graph, &policy, "act-bruno", "bruno-token").await; assert!( matches!(sdk, ParityDecision::Deny) && matches!(http, ParityDecision::Deny), "SDK={sdk:?} HTTP={http:?} — should both Deny", @@ -4289,12 +4279,12 @@ async fn policy_decision_parity_change_team_on_main_denied() { #[tokio::test(flavor = "multi_thread")] async fn policy_decision_parity_branch_merge_admin_allowed() { // (act-ragnor, branch_merge, feature→main) — admins-merge-to-protected - // rule applies. Both Allow. Each path uses its own fresh repo — + // rule applies. Both Allow. Each path uses its own fresh graph — // a successful merge consumes the feature branch's commit on main. - let (_t1, repo1, policy1) = build_parity_repo().await; - let sdk = sdk_merge_decision(&repo1, &policy1, "act-ragnor").await; - let (_t2, repo2, policy2) = build_parity_repo().await; - let http = http_merge_decision(&repo2, &policy2, "act-ragnor", "ragnor-token").await; + let (_t1, graph1, policy1) = build_parity_graph().await; + let sdk = sdk_merge_decision(&graph1, &policy1, "act-ragnor").await; + let (_t2, graph2, policy2) = build_parity_graph().await; + let http = http_merge_decision(&graph2, &policy2, "act-ragnor", "ragnor-token").await; assert!( matches!(sdk, ParityDecision::Allow) && matches!(http, ParityDecision::Allow), "SDK={sdk:?} HTTP={http:?} — should both Allow", @@ -4305,9 +4295,9 @@ async fn policy_decision_parity_branch_merge_admin_allowed() { async fn policy_decision_parity_branch_merge_team_denied() { // (act-bruno, branch_merge, feature→main) — no rule grants bruno // branch_merge. Both Deny. - let (_temp, repo, policy) = build_parity_repo().await; - let sdk = sdk_merge_decision(&repo, &policy, "act-bruno").await; - let http = http_merge_decision(&repo, &policy, "act-bruno", "bruno-token").await; + let (_temp, graph, policy) = build_parity_graph().await; + let sdk = sdk_merge_decision(&graph, &policy, "act-bruno").await; + let http = http_merge_decision(&graph, &policy, "act-bruno", "bruno-token").await; assert!( matches!(sdk, ParityDecision::Deny) && matches!(http, ParityDecision::Deny), "SDK={sdk:?} HTTP={http:?} — should both Deny", @@ -4324,16 +4314,16 @@ async fn policy_decision_parity_branch_merge_team_denied() { #[tokio::test(flavor = "multi_thread")] async fn schema_apply_route_soft_drops_property_via_http() { - let (temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, ) .await; // Load a row that has the column we're about to drop. - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); { - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.load( "main", r#"{"type":"Person","data":{"name":"PreDrop","age":42}}"#, @@ -4342,7 +4332,7 @@ async fn schema_apply_route_soft_drops_property_via_http() { .await .unwrap(); } - let pre_version = manifest_dataset_version(&repo).await; + let pre_version = manifest_dataset_version(&graph).await; let (status, payload) = json_response( &app, @@ -4365,7 +4355,7 @@ async fn schema_apply_route_soft_drops_property_via_http() { assert_eq!(payload["applied"], true); // Catalog reflects the drop: `age` is gone from the live schema. - let reopened = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); assert!( !reopened.catalog().node_types["Person"] .properties @@ -4393,13 +4383,13 @@ async fn schema_apply_route_soft_drops_property_via_http() { #[tokio::test(flavor = "multi_thread")] async fn schema_apply_route_soft_drops_node_type_via_http() { - let (temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, ) .await; - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let (status, payload) = json_response( &app, @@ -4421,7 +4411,7 @@ async fn schema_apply_route_soft_drops_node_type_via_http() { assert_eq!(status, StatusCode::OK); assert_eq!(payload["applied"], true); - let reopened = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); assert!( !reopened.catalog().node_types.contains_key("Company"), "catalog should not contain `Company` after drop" @@ -4434,15 +4424,15 @@ async fn schema_apply_route_soft_drops_node_type_via_http() { #[tokio::test(flavor = "multi_thread")] async fn schema_apply_route_hard_drops_property_with_allow_data_loss() { - let (temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, ) .await; - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); { - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.load( "main", r#"{"type":"Person","data":{"name":"PreDropHard","age":50}}"#, @@ -4474,7 +4464,7 @@ async fn schema_apply_route_hard_drops_property_with_allow_data_loss() { assert_eq!(payload["applied"], true); // Catalog reflects the drop. - let reopened = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); assert!( !reopened.catalog().node_types["Person"] .properties @@ -4488,7 +4478,10 @@ async fn schema_apply_route_hard_drops_property_with_allow_data_loss() { .find(|s| s["kind"] == "drop_property") .expect("plan should include drop_property step"); let mode = &drop_step["mode"]; - assert_eq!(mode, "hard", "expected hard mode under allow_data_loss=true"); + assert_eq!( + mode, "hard", + "expected hard mode under allow_data_loss=true" + ); } #[tokio::test(flavor = "multi_thread")] @@ -4497,13 +4490,13 @@ async fn schema_apply_route_keeps_drops_soft_without_flag() { // allow_data_loss flag → drops stay Soft (prior column data // remains time-travel-reachable). Pins the default semantics // against accidental Hard promotion. - let (temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, ) .await; - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let (status, payload) = json_response( &app, @@ -4532,7 +4525,7 @@ async fn schema_apply_route_keeps_drops_soft_without_flag() { .expect("plan should include drop_property step"); let mode = &drop_step["mode"]; assert_eq!(mode, "soft", "expected soft mode without allow_data_loss"); - let _ = repo; + let _ = graph; } #[tokio::test(flavor = "multi_thread")] @@ -4541,17 +4534,17 @@ async fn schema_apply_route_additive_property_preserves_existing_rows() { // AddProperty wasn't pinned with a row-count check anywhere. // Load N rows, apply schema adding nullable property, verify // every row is still readable and the new column is null. - let (temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, ) .await; - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); // Standard fixture data: 4 Persons + 1 Company. Load it. let pre_count = { - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.load( "main", &fs::read_to_string(fixture("test.jsonl")).unwrap(), @@ -4588,8 +4581,9 @@ async fn schema_apply_route_additive_property_preserves_existing_rows() { assert_eq!(payload["applied"], true); // Row count preserved. - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); - let snap = db.snapshot_of(omnigraph::db::ReadTarget::branch("main")) + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); + let snap = db + .snapshot_of(omnigraph::db::ReadTarget::branch("main")) .await .unwrap(); let post_count = snap.entry("node:Person").expect("Person").row_count; diff --git a/crates/omnigraph/Cargo.toml b/crates/omnigraph/Cargo.toml index a3cc5df..1fa3436 100644 --- a/crates/omnigraph/Cargo.toml +++ b/crates/omnigraph/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "omnigraph-engine" -version = "0.4.2" +version = "0.6.0" edition = "2024" description = "Runtime engine for the Omnigraph graph database." license = "MIT" @@ -16,8 +16,8 @@ default = [] failpoints = ["dep:fail", "fail/failpoints"] [dependencies] -omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" } -omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" } +omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" } +omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" } lance = { workspace = true } lance-datafusion = { workspace = true } datafusion = { workspace = true } @@ -51,7 +51,7 @@ chrono = { workspace = true } arc-swap = { workspace = true } [dev-dependencies] -omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" } +omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" } tokio = { workspace = true } lance-namespace-impls = { workspace = true } serial_test = "3" diff --git a/crates/omnigraph/src/db/manifest.rs b/crates/omnigraph/src/db/manifest.rs index f31cc4f..7fcf7de 100644 --- a/crates/omnigraph/src/db/manifest.rs +++ b/crates/omnigraph/src/db/manifest.rs @@ -6,6 +6,8 @@ use lance::Dataset; use lance_namespace::models::CreateTableVersionRequest; use omnigraph_compiler::catalog::Catalog; +#[path = "manifest/graph.rs"] +mod graph; #[path = "manifest/layout.rs"] mod layout; #[path = "manifest/metadata.rs"] @@ -18,11 +20,10 @@ mod namespace; mod publisher; #[path = "manifest/recovery.rs"] mod recovery; -#[path = "manifest/repo.rs"] -mod repo; #[path = "manifest/state.rs"] mod state; +use graph::{init_manifest_graph, open_manifest_graph, snapshot_state_at}; use layout::{manifest_uri, open_manifest_dataset, type_name_hash}; pub(crate) use metadata::TableVersionMetadata; #[cfg(test)] @@ -33,11 +34,10 @@ pub(crate) use namespace::open_table_head_for_write; use namespace::{branch_manifest_namespace, staged_table_namespace}; use publisher::{GraphNamespacePublisher, ManifestBatchPublisher}; pub(crate) use recovery::{ - delete_sidecar, has_schema_apply_sidecar, new_sidecar, recover_manifest_drift, write_sidecar, RecoveryMode, RecoverySidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin, - SidecarTableRegistration, SidecarTombstone, + SidecarTableRegistration, SidecarTombstone, delete_sidecar, has_schema_apply_sidecar, + new_sidecar, recover_manifest_drift, write_sidecar, }; -use repo::{init_manifest_repo, open_manifest_repo, snapshot_state_at}; pub use state::SubTableEntry; #[cfg(test)] use state::string_column; @@ -215,12 +215,12 @@ impl ManifestCoordinator { self } - /// Create a new repo at `root_uri` from a catalog. + /// Create a new graph at `root_uri` from a catalog. /// /// Creates per-type Lance datasets and the namespace `__manifest` table. pub async fn init(root_uri: &str, catalog: &Catalog) -> Result { let root = root_uri.trim_end_matches('/'); - let (dataset, known_state) = init_manifest_repo(root, catalog).await?; + let (dataset, known_state) = init_manifest_graph(root, catalog).await?; Ok(Self::from_parts_with_default_publisher( root, @@ -230,10 +230,10 @@ impl ManifestCoordinator { )) } - /// Open an existing repo's manifest. + /// Open an existing graph's manifest. pub async fn open(root_uri: &str) -> Result { let root = root_uri.trim_end_matches('/'); - let (dataset, known_state) = open_manifest_repo(root, None).await?; + let (dataset, known_state) = open_manifest_graph(root, None).await?; Ok(Self::from_parts_with_default_publisher( root, dataset, @@ -242,14 +242,14 @@ impl ManifestCoordinator { )) } - /// Open an existing repo's manifest at a specific branch. + /// Open an existing graph's manifest at a specific branch. pub async fn open_at_branch(root_uri: &str, branch: &str) -> Result { if branch == "main" { return Self::open(root_uri).await; } let root = root_uri.trim_end_matches('/'); - let (dataset, known_state) = open_manifest_repo(root, Some(branch)).await?; + let (dataset, known_state) = open_manifest_graph(root, Some(branch)).await?; Ok(Self::from_parts_with_default_publisher( root, dataset, @@ -410,7 +410,7 @@ impl ManifestCoordinator { Ok(descendants) } - /// Root URI of the repo. + /// Root URI of the graph. pub fn root_uri(&self) -> &str { &self.root_uri } diff --git a/crates/omnigraph/src/db/manifest/repo.rs b/crates/omnigraph/src/db/manifest/graph.rs similarity index 98% rename from crates/omnigraph/src/db/manifest/repo.rs rename to crates/omnigraph/src/db/manifest/graph.rs index 90a958b..6c414aa 100644 --- a/crates/omnigraph/src/db/manifest/repo.rs +++ b/crates/omnigraph/src/db/manifest/graph.rs @@ -17,7 +17,7 @@ use super::state::{ ManifestState, SubTableEntry, entries_to_batch, manifest_schema, read_manifest_state, }; -pub(super) async fn init_manifest_repo( +pub(super) async fn init_manifest_graph( root_uri: &str, catalog: &Catalog, ) -> Result<(Dataset, ManifestState)> { @@ -47,7 +47,7 @@ pub(super) async fn init_manifest_repo( Ok((dataset, known_state)) } -pub(super) async fn open_manifest_repo( +pub(super) async fn open_manifest_graph( root_uri: &str, branch: Option<&str>, ) -> Result<(Dataset, ManifestState)> { diff --git a/crates/omnigraph/src/db/manifest/migrations.rs b/crates/omnigraph/src/db/manifest/migrations.rs index c568bef..bbb7995 100644 --- a/crates/omnigraph/src/db/manifest/migrations.rs +++ b/crates/omnigraph/src/db/manifest/migrations.rs @@ -24,8 +24,8 @@ //! Only on open-for-write paths (the publisher's `load_publish_state`). //! Reads are side-effect-free by contract; an old-shape `__manifest` reads //! fine, it just lacks the protections introduced by later versions. -//! `init_manifest_repo` stamps the current version at creation, so newly -//! initialized repos never need migration. +//! `init_manifest_graph` stamps the current version at creation, so newly +//! initialized graphs never need migration. //! //! ## Forward-version protection //! @@ -78,7 +78,7 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()> if current > INTERNAL_MANIFEST_SCHEMA_VERSION { return Err(OmniError::manifest(format!( "__manifest is stamped at internal schema v{} but this binary expects v{} \ - — upgrade omnigraph before opening this repo for writes", + — upgrade omnigraph before opening this graph for writes", current, INTERNAL_MANIFEST_SCHEMA_VERSION, ))); } @@ -112,7 +112,10 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()> async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> { dataset .update_field_metadata() - .update("object_id", [(OBJECT_ID_PK_KEY.to_string(), "true".to_string())]) + .update( + "object_id", + [(OBJECT_ID_PK_KEY.to_string(), "true".to_string())], + ) .map_err(|e| OmniError::Lance(e.to_string()))? .await .map_err(|e| OmniError::Lance(e.to_string()))?; @@ -121,10 +124,7 @@ async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> { async fn set_stamp(dataset: &mut Dataset, version: u32) -> Result<()> { dataset - .update_schema_metadata([( - INTERNAL_SCHEMA_VERSION_KEY.to_string(), - version.to_string(), - )]) + .update_schema_metadata([(INTERNAL_SCHEMA_VERSION_KEY.to_string(), version.to_string())]) .await .map_err(|e| OmniError::Lance(e.to_string()))?; Ok(()) diff --git a/crates/omnigraph/src/db/manifest/namespace.rs b/crates/omnigraph/src/db/manifest/namespace.rs index 724b3e5..80d206f 100644 --- a/crates/omnigraph/src/db/manifest/namespace.rs +++ b/crates/omnigraph/src/db/manifest/namespace.rs @@ -230,6 +230,11 @@ impl LanceNamespace for BranchManifestNamespace { metadata: None, properties: None, managed_versioning: Some(true), + // Every table we return from describe_table is physically + // materialized (open_manifest_dataset succeeds), never just + // "declared." See lance-namespace 6.0.1 DescribeTableResponse + // field docs. + is_only_declared: Some(false), }) } @@ -373,6 +378,11 @@ impl LanceNamespace for StagedTableNamespace { metadata: None, properties: None, managed_versioning: Some(true), + // Every table we return from describe_table is physically + // materialized (open_manifest_dataset succeeds), never just + // "declared." See lance-namespace 6.0.1 DescribeTableResponse + // field docs. + is_only_declared: Some(false), }) } diff --git a/crates/omnigraph/src/db/manifest/recovery.rs b/crates/omnigraph/src/db/manifest/recovery.rs index 0d42a85..425499a 100644 --- a/crates/omnigraph/src/db/manifest/recovery.rs +++ b/crates/omnigraph/src/db/manifest/recovery.rs @@ -58,7 +58,7 @@ use super::{ManifestChange, SubTableUpdate, TableRegistration, TableTombstone}; /// into the audit row's `recovery_for_actor` field. pub(crate) const RECOVERY_ACTOR: &str = "omnigraph:recovery"; -/// Subdirectory under the repo root holding sidecar files. +/// Subdirectory under the graph root holding sidecar files. pub(crate) const RECOVERY_DIR_NAME: &str = "__recovery"; /// Current sidecar JSON shape version. Bumping this is a breaking change: @@ -142,7 +142,7 @@ pub(crate) struct SidecarTablePin { pub(crate) struct SidecarTableRegistration { /// Stable identifier (`node:Tag`, `edge:WorksAt`, etc.). pub table_key: String, - /// Repo-relative path the manifest will register + /// Graph-relative path the manifest will register /// (e.g. `nodes/{fnv1a64-hex}`); recovery joins this with `root_uri` /// to open the dataset Lance HEAD when constructing the /// accompanying `Update`. @@ -295,7 +295,7 @@ pub(crate) enum SidecarDecision { Abort, } -/// Build the `__recovery/` directory URI under a repo root. +/// Build the `__recovery/` directory URI under a graph root. pub(crate) fn recovery_dir_uri(root_uri: &str) -> String { let trimmed = root_uri.trim_end_matches('/'); format!("{}/{}", trimmed, RECOVERY_DIR_NAME) @@ -1122,7 +1122,7 @@ async fn record_audit( /// the rename so the recovery sweep's roll-forward step sees the new /// catalog. Without this, the disambiguation logic deletes the staging /// files (since manifest still pins the old table set) and leaves the -/// repo with new-schema data on disk but the old `_schema.pg` live — +/// graph with new-schema data on disk but the old `_schema.pg` live — /// real corruption. pub(crate) async fn has_schema_apply_sidecar( root_uri: &str, diff --git a/crates/omnigraph/src/db/manifest/tests.rs b/crates/omnigraph/src/db/manifest/tests.rs index d51a882..effa0b5 100644 --- a/crates/omnigraph/src/db/manifest/tests.rs +++ b/crates/omnigraph/src/db/manifest/tests.rs @@ -1393,7 +1393,10 @@ async fn test_concurrent_publish_with_overlapping_expected_versions_one_succeeds // version (no duplicate version rows). let mc = ManifestCoordinator::open(uri).await.unwrap(); let entry = mc.snapshot().entry("node:Person").unwrap().clone(); - assert!(entry.table_version > 1, "Person should have advanced past v=1"); + assert!( + entry.table_version > 1, + "Person should have advanced past v=1" + ); } #[tokio::test] @@ -1418,7 +1421,7 @@ async fn test_publish_migrates_pre_stamp_manifest_to_current_version() { let catalog = build_test_catalog(); let mc = ManifestCoordinator::init(uri, &catalog).await.unwrap(); - // Simulate a v1 (pre-stamp) repo by removing the schema-level stamp on disk. + // Simulate a v1 (pre-stamp) graph by removing the schema-level stamp on disk. { let mut ds = open_manifest_dataset(uri, None).await.unwrap(); ds.update_schema_metadata([( @@ -1449,7 +1452,7 @@ async fn test_publish_migrates_pre_stamp_manifest_to_current_version() { assert_eq!( super::migrations::read_stamp(&post), super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION, - "publish on a v1 repo should leave the manifest stamped at the current version", + "publish on a v1 graph should leave the manifest stamped at the current version", ); // Manifest should still serve correctly post-migration. diff --git a/crates/omnigraph/src/db/omnigraph.rs b/crates/omnigraph/src/db/omnigraph.rs index 610be62..30a8f14 100644 --- a/crates/omnigraph/src/db/omnigraph.rs +++ b/crates/omnigraph/src/db/omnigraph.rs @@ -166,7 +166,7 @@ pub enum OpenMode { } impl Omnigraph { - /// Create a new repo at `uri` from schema source. + /// Create a new graph at `uri` from schema source. /// /// Creates `_schema.pg`, per-type Lance datasets, and `__manifest`. pub async fn init(uri: &str, schema_source: &str) -> Result { @@ -205,7 +205,7 @@ impl Omnigraph { }) } - /// Open an existing repo (read-write). + /// Open an existing graph (read-write). /// /// Reads `_schema.pg`, parses it, builds the catalog, and opens `__manifest`. /// Runs the open-time recovery sweep before returning — see [`OpenMode`]. @@ -213,7 +213,7 @@ impl Omnigraph { Self::open_with_storage_and_mode(uri, storage_for_uri(uri)?, OpenMode::ReadWrite).await } - /// Open an existing repo for read-only consumers (NDJSON export, + /// Open an existing graph for read-only consumers (NDJSON export, /// `commit list`, etc.). Skips the recovery sweep — see [`OpenMode`]. pub async fn open_read_only(uri: &str) -> Result { Self::open_with_storage_and_mode(uri, storage_for_uri(uri)?, OpenMode::ReadOnly).await @@ -397,7 +397,8 @@ impl Omnigraph { desired_schema_source: &str, options: SchemaApplyOptions, ) -> Result { - self.apply_schema_as(desired_schema_source, options, None).await + self.apply_schema_as(desired_schema_source, options, None) + .await } /// Apply a schema migration with an explicit actor for engine-layer @@ -470,7 +471,7 @@ impl Omnigraph { Arc::clone(&self.merge_exclusive) } - /// Engine-level access to the repo's normalized root URI. Used by + /// Engine-level access to the graph's normalized root URI. Used by /// the recovery sidecar protocol to compute `__recovery/` paths. pub(crate) fn root_uri(&self) -> &str { &self.root_uri @@ -510,9 +511,10 @@ impl Omnigraph { let normalized = normalize_branch_name(branch.unwrap_or("main"))?; let coord = self.coordinator.read().await; if normalized.as_deref() == coord.current_branch() { - let snapshot_id = coord.head_commit_id().await?.unwrap_or_else(|| { - SnapshotId::synthetic(coord.current_branch(), coord.version()) - }); + let snapshot_id = coord + .head_commit_id() + .await? + .unwrap_or_else(|| SnapshotId::synthetic(coord.current_branch(), coord.version())); return Ok(ResolvedTarget { requested, branch: coord.current_branch().map(str::to_string), @@ -587,7 +589,7 @@ impl Omnigraph { /// exist. Required BEFORE manifest-drift recovery so a /// SchemaApply roll-forward doesn't publish the manifest while /// the staging files remain unrenamed (which would corrupt the - /// repo: data on new schema, catalog on old). + /// graph: data on new schema, catalog on old). /// 3. `recover_manifest_drift(... RollForwardOnly)` — close the /// finalize→publisher residual via roll-forward; defer rollback /// work to next ReadWrite open. @@ -668,7 +670,11 @@ impl Omnigraph { pub async fn resolve_snapshot(&self, branch: &str) -> Result { self.ensure_schema_state_valid().await?; - self.coordinator.read().await.resolve_snapshot_id(branch).await + self.coordinator + .read() + .await + .resolve_snapshot_id(branch) + .await } pub(crate) async fn resolved_target( @@ -676,7 +682,11 @@ impl Omnigraph { target: impl Into, ) -> Result { self.ensure_schema_state_valid().await?; - self.coordinator.read().await.resolve_target(&target.into()).await + self.coordinator + .read() + .await + .resolve_target(&target.into()) + .await } // ─── Change detection ──────────────────────────────────────────────── @@ -708,7 +718,9 @@ impl Omnigraph { filter: &crate::changes::ChangeFilter, ) -> Result { let coord = self.coordinator.read().await; - let from_commit = coord.resolve_commit(&SnapshotId::new(from_commit_id)).await?; + let from_commit = coord + .resolve_commit(&SnapshotId::new(from_commit_id)) + .await?; let to_commit = coord.resolve_commit(&SnapshotId::new(to_commit_id)).await?; let from_snap = coord .resolve_target(&ReadTarget::Snapshot(SnapshotId::new( @@ -753,7 +765,11 @@ impl Omnigraph { /// Create a Snapshot at any historical manifest version. pub async fn snapshot_at_version(&self, version: u64) -> Result { self.ensure_schema_state_valid().await?; - self.coordinator.read().await.snapshot_at_version(version).await + self.coordinator + .read() + .await + .snapshot_at_version(version) + .await } pub async fn export_jsonl( @@ -894,11 +910,20 @@ impl Omnigraph { } pub(crate) async fn active_branch(&self) -> Option { - self.coordinator.read().await.current_branch().map(str::to_string) + self.coordinator + .read() + .await + .current_branch() + .map(str::to_string) } async fn ensure_branch_delete_safe(&self, branch: &str, branches: &[String]) -> Result<()> { - let descendants = self.coordinator.read().await.branch_descendants(branch).await?; + let descendants = self + .coordinator + .read() + .await + .branch_descendants(branch) + .await?; if let Some(descendant) = descendants.first() { return Err(OmniError::manifest_conflict(format!( "cannot delete branch '{}' because descendant branch '{}' still depends on it", @@ -954,7 +979,12 @@ impl Omnigraph { } async fn delete_branch_storage_only(&self, branch: &str) -> Result<()> { - let active = self.coordinator.read().await.current_branch().map(str::to_string); + let active = self + .coordinator + .read() + .await + .current_branch() + .map(str::to_string); if active.as_deref() == Some(branch) { return Err(OmniError::manifest_conflict(format!( "cannot delete currently active branch '{}'", @@ -1013,11 +1043,7 @@ impl Omnigraph { self.coordinator.write().await.branch_create(name).await } - pub async fn branch_create_from( - &self, - from: impl Into, - name: &str, - ) -> Result<()> { + pub async fn branch_create_from(&self, from: impl Into, name: &str) -> Result<()> { self.branch_create_from_as(from, name, None).await } @@ -1134,7 +1160,9 @@ impl Omnigraph { pub async fn get_commit(&self, commit_id: &str) -> Result { self.ensure_schema_state_valid().await?; - self.coordinator.read().await + self.coordinator + .read() + .await .resolve_commit(&SnapshotId::new(commit_id)) .await } diff --git a/crates/omnigraph/src/db/omnigraph/optimize.rs b/crates/omnigraph/src/db/omnigraph/optimize.rs index 4d0f0ce..e158dc7 100644 --- a/crates/omnigraph/src/db/omnigraph/optimize.rs +++ b/crates/omnigraph/src/db/omnigraph/optimize.rs @@ -1,7 +1,7 @@ //! Lance compaction + version cleanup exposed at the graph level. //! //! Lance accumulates many small `.lance` fragment files per table over the -//! life of a repo: each `write`, `load`, and `change` op appends one or more +//! life of a graph: each `write`, `load`, and `change` op appends one or more //! fragments and a new manifest. Over long timescales this hurts open times //! and S3 object counts without improving anything. //! @@ -176,10 +176,9 @@ pub async fn cleanup_all_tables( clean_referenced_branches: false, delete_rate_limit: None, }; - let removed: RemovalStats = - lance::dataset::cleanup::cleanup_old_versions(&ds, policy) - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; + let removed: RemovalStats = lance::dataset::cleanup::cleanup_old_versions(&ds, policy) + .await + .map_err(|e| OmniError::Lance(e.to_string()))?; Ok(TableCleanupStats { table_key, bytes_removed: removed.bytes_removed, @@ -198,12 +197,7 @@ fn all_table_keys(catalog: &omnigraph_compiler::catalog::Catalog) -> Vec .node_types .keys() .map(|n| format!("node:{}", n)) - .chain( - catalog - .edge_types - .keys() - .map(|n| format!("edge:{}", n)), - ) + .chain(catalog.edge_types.keys().map(|n| format!("edge:{}", n))) .collect(); keys.sort(); keys diff --git a/crates/omnigraph/src/db/omnigraph/schema_apply.rs b/crates/omnigraph/src/db/omnigraph/schema_apply.rs index 6073f6f..0dcf0f9 100644 --- a/crates/omnigraph/src/db/omnigraph/schema_apply.rs +++ b/crates/omnigraph/src/db/omnigraph/schema_apply.rs @@ -97,7 +97,7 @@ pub(super) async fn apply_schema_with_lock( // Skip `main` and internal system branches. The schema-apply lock branch // is excluded because it is the cluster-wide schema-apply serializer. // `__run__*` branches are no longer created; the filter remains as - // defense-in-depth for legacy repos with leftover staging branches. + // defense-in-depth for legacy graphs with leftover staging branches. // A future production sweep will let this guard go. let blocking_branches = branches .into_iter() @@ -105,7 +105,7 @@ pub(super) async fn apply_schema_with_lock( .collect::>(); if !blocking_branches.is_empty() { return Err(OmniError::manifest_conflict(format!( - "schema apply requires a repo with only main; found non-main branches: {}", + "schema apply requires a graph with only main; found non-main branches: {}", blocking_branches.join(", ") ))); } @@ -780,7 +780,7 @@ pub(super) async fn acquire_schema_apply_lock(db: &Omnigraph) -> Result<()> { if !blocking_branches.is_empty() { let _ = release_schema_apply_lock(db).await; return Err(OmniError::manifest_conflict(format!( - "schema apply requires a repo with only main; found non-main branches: {}", + "schema apply requires a graph with only main; found non-main branches: {}", blocking_branches.join(", ") ))); } diff --git a/crates/omnigraph/src/db/recovery_audit.rs b/crates/omnigraph/src/db/recovery_audit.rs index b7d4975..b9e8e7b 100644 --- a/crates/omnigraph/src/db/recovery_audit.rs +++ b/crates/omnigraph/src/db/recovery_audit.rs @@ -93,7 +93,7 @@ pub(crate) struct RecoveryAudit { } impl RecoveryAudit { - /// Open the recovery-audit dataset for the repo, or return a handle + /// Open the recovery-audit dataset for the graph, or return a handle /// with no dataset yet (created on first append). Mirrors the /// optional-dataset pattern from `_graph_commit_actors.lance`. pub(crate) async fn open(root_uri: &str) -> Result { @@ -205,9 +205,7 @@ fn recovery_record_to_batch(record: &RecoveryAuditRecord) -> Result vec![ Arc::new(StringArray::from(vec![record.graph_commit_id.clone()])), Arc::new(StringArray::from(vec![record.recovery_kind.as_str()])), - Arc::new(StringArray::from(vec![record - .recovery_for_actor - .clone()])), + Arc::new(StringArray::from(vec![record.recovery_for_actor.clone()])), Arc::new(StringArray::from(vec![record.operation_id.clone()])), Arc::new(StringArray::from(vec![record.sidecar_writer_kind.clone()])), Arc::new(StringArray::from(vec![outcomes_json])), @@ -221,10 +219,14 @@ fn decode_row(batch: &RecordBatch, row: usize) -> Result { let str_col = |name: &str| -> Result<&StringArray> { batch .column_by_name(name) - .ok_or_else(|| OmniError::manifest_internal(format!("missing column '{}' in recovery audit", name)))? + .ok_or_else(|| { + OmniError::manifest_internal(format!("missing column '{}' in recovery audit", name)) + })? .as_any() .downcast_ref::() - .ok_or_else(|| OmniError::manifest_internal(format!("column '{}' has wrong type", name))) + .ok_or_else(|| { + OmniError::manifest_internal(format!("column '{}' has wrong type", name)) + }) }; let ts_col = batch .column_by_name("created_at") @@ -269,9 +271,7 @@ pub(crate) fn now_micros() -> Result { SystemTime::now() .duration_since(UNIX_EPOCH) .map(|d| d.as_micros() as i64) - .map_err(|e| { - OmniError::manifest_internal(format!("system clock before unix epoch: {}", e)) - }) + .map_err(|e| OmniError::manifest_internal(format!("system clock before unix epoch: {}", e))) } #[cfg(test)] @@ -307,7 +307,7 @@ mod tests { let root = dir.path().to_str().unwrap(); let mut audit = RecoveryAudit::open(root).await.unwrap(); - // Empty repo: list returns empty. + // Empty graph: list returns empty. assert!(audit.list().await.unwrap().is_empty()); // Append + list. diff --git a/crates/omnigraph/src/db/schema_state.rs b/crates/omnigraph/src/db/schema_state.rs index 13dfccc..b131a16 100644 --- a/crates/omnigraph/src/db/schema_state.rs +++ b/crates/omnigraph/src/db/schema_state.rs @@ -61,7 +61,7 @@ pub(crate) async fn load_or_bootstrap_schema_contract( .collect::>(); if !public_non_main.is_empty() { return Err(schema_lock_conflict(format!( - "repo is missing persisted schema state and has public branches ({}); public branches block schema evolution entirely", + "graph is missing persisted schema state and has public branches ({}); public branches block schema evolution entirely", public_non_main.join(", ") ))); } @@ -70,7 +70,7 @@ pub(crate) async fn load_or_bootstrap_schema_contract( Ok((current_source_ir.clone(), state)) } SchemaContractRead::PartialMissing => Err(schema_lock_conflict( - "repo schema state is incomplete (_schema.ir.json and __schema_state.json must either both exist or both be absent)", + "graph schema state is incomplete (_schema.ir.json and __schema_state.json must either both exist or both be absent)", )), } } @@ -84,7 +84,7 @@ pub(crate) async fn validate_schema_contract( SchemaContractRead::Present { ir, state } => (ir, state), SchemaContractRead::MissingAll | SchemaContractRead::PartialMissing => { return Err(schema_lock_conflict( - "repo is missing persisted schema state; manual coordination is required before schema changes are allowed", + "graph is missing persisted schema state; manual coordination is required before schema changes are allowed", )); } }; @@ -163,7 +163,7 @@ pub(crate) async fn read_accepted_schema_ir( } SchemaContractRead::MissingAll | SchemaContractRead::PartialMissing => { Err(schema_lock_conflict( - "repo is missing persisted schema state; manual coordination is required before schema changes are allowed", + "graph is missing persisted schema state; manual coordination is required before schema changes are allowed", )) } } @@ -221,7 +221,7 @@ async fn read_schema_contract( })?; let state = serde_json::from_str::(&state_json).map_err(|err| { schema_lock_conflict(format!( - "repo schema state in {} is invalid: {}", + "graph schema state in {} is invalid: {}", SCHEMA_STATE_FILENAME, err )) })?; @@ -234,7 +234,7 @@ async fn read_schema_contract( fn validate_persisted_schema_contract(ir: &SchemaIR, state: &SchemaState) -> Result<()> { if state.format_version != SCHEMA_STATE_FORMAT_VERSION { return Err(schema_lock_conflict(format!( - "repo schema state format {} is unsupported", + "graph schema state format {} is unsupported", state.format_version ))); } @@ -344,7 +344,7 @@ pub(crate) async fn recover_schema_state_files( // to the new Lance HEADs; we MUST also rename the staging files // forward so the catalog matches. Without this, the disambiguation // logic below sees actual_keys == live_keys (manifest didn't move) - // and deletes the staging files, leaving the repo with new-schema + // and deletes the staging files, leaving the graph with new-schema // data on disk but the old `_schema.pg` live — corruption. if crate::db::manifest::has_schema_apply_sidecar(root_uri, storage.as_ref()).await? { warn!( diff --git a/crates/omnigraph/src/exec/query.rs b/crates/omnigraph/src/exec/query.rs index 88865d8..24a8722 100644 --- a/crates/omnigraph/src/exec/query.rs +++ b/crates/omnigraph/src/exec/query.rs @@ -1037,8 +1037,16 @@ async fn execute_node_scan( let table_key = format!("node:{}", type_name); let ds = snapshot.open(&table_key).await?; - // Build Lance SQL filter string from non-search IR filters - let filter_sql = build_lance_filter(filters, params); + // Lower the IR filters to a DataFusion `Expr` and apply via + // `Scanner::filter_expr` inside the configure closure. The string + // pushdown path (`build_lance_filter` → `scanner.filter(&str)`) is + // gone for node scans — structured Expr unlocks `CompOp::Contains` + // pushdown (via `array_has`) and lets DF 53's optimizer rules + // (vectorized IN-list, PhysicalExprSimplifier, CASE-NULL shortcut) + // reach our predicates. Other call sites that still take string SQL + // (hydrate_nodes for the Expand pushdown, count_rows, the mutation + // delete path) migrate in follow-up MRs. + let filter_expr = build_lance_filter_expr(filters, params); // Blob columns must be excluded from scan when a filter is present // (Lance bug: BlobsDescriptions + filter triggers a projection assertion). @@ -1056,10 +1064,15 @@ async fn execute_node_scan( let batches = crate::table_store::TableStore::scan_stream_with( &ds, projection, - filter_sql.as_deref(), + None, None, false, |scanner| { + // Apply the structured IR filter via Lance's Expr pushdown. + if let Some(ref expr) = filter_expr { + scanner.filter_expr(expr.clone()); + } + // Apply FTS queries from hoisted search filters (search/fuzzy/match_text in match clause) for filter in filters { if is_search_filter(filter) { @@ -1288,6 +1301,125 @@ pub(super) fn literal_to_sql(lit: &Literal) -> String { } } +// --------------------------------------------------------------------------- +// Structured DataFusion-Expr pushdown +// +// Parallel to the `ir_*_to_sql` family above, these helpers lower the same +// IR filter shapes to `datafusion::prelude::Expr` so we can call +// `Scanner::filter_expr(Expr)` instead of `Scanner::filter(&str)`. The +// structured form unlocks two things the string path could not express: +// +// 1. `CompOp::Contains` against list-typed columns (lowered to +// `array_has(col, value)` — requires the `nested_expressions` +// feature on the `datafusion` crate, enabled in the workspace). +// 2. Optimizer rules in DataFusion 53 that act on `Expr` shapes +// (vectorized `IN`-list eq kernel, `PhysicalExprSimplifier`, the +// `CASE WHEN x THEN y ELSE NULL` shortcut, etc.). +// +// Search predicates (`is_search_filter`) are still handled separately via +// `scanner.full_text_search(...)`, not via filter_expr — they stay None +// here just like in `ir_filter_to_sql`. The `literal_to_sql` path remains +// because the mutation/update layer (`exec/mutation.rs`) still produces +// SQL strings for `Dataset::delete(&str)`; that migration is MR-A's +// territory (Lance #6658 + delete two-phase). + +/// Convert IR filters to a single DataFusion `Expr` (AND-joined), or +/// `None` if no filter is pushable. +pub(super) fn build_lance_filter_expr( + filters: &[IRFilter], + params: &ParamMap, +) -> Option { + use datafusion::logical_expr::Operator; + use datafusion::prelude::Expr; + + let mut acc: Option = None; + for f in filters { + let Some(e) = ir_filter_to_expr(f, params) else { + continue; + }; + acc = Some(match acc { + None => e, + Some(prev) => Expr::BinaryExpr(datafusion::logical_expr::BinaryExpr::new( + Box::new(prev), + Operator::And, + Box::new(e), + )), + }); + } + acc +} + +/// Convert a single IR filter to a DataFusion `Expr`. Returns `None` for +/// search-mode filters (handled via `scanner.full_text_search`) or any +/// expression shape we can't pushdown. +pub(super) fn ir_filter_to_expr( + filter: &IRFilter, + params: &ParamMap, +) -> Option { + use datafusion::functions_nested::expr_fn::array_has; + + if is_search_filter(filter) { + return None; + } + + // List-contains: `prop CONTAINS value` lowers to `array_has(prop, value)`. + // This is the case `ir_filter_to_sql` had to return None for ("Can't + // pushdown list contains"); with structured Expr it pushes down fine. + if matches!(filter.op, CompOp::Contains) { + let left = ir_expr_to_expr(&filter.left, params)?; + let right = ir_expr_to_expr(&filter.right, params)?; + return Some(array_has(left, right)); + } + + let left = ir_expr_to_expr(&filter.left, params)?; + let right = ir_expr_to_expr(&filter.right, params)?; + Some(match filter.op { + CompOp::Eq => left.eq(right), + CompOp::Ne => left.not_eq(right), + CompOp::Gt => left.gt(right), + CompOp::Lt => left.lt(right), + CompOp::Ge => left.gt_eq(right), + CompOp::Le => left.lt_eq(right), + CompOp::Contains => unreachable!("handled above"), + }) +} + +/// Convert an IR expression to a DataFusion `Expr`. Returns `None` for +/// shapes we don't support in pushdown (search funcs, RRF, aggregates, +/// variable refs that aren't a property access). +pub(super) fn ir_expr_to_expr( + expr: &IRExpr, + params: &ParamMap, +) -> Option { + use datafusion::prelude::{col, lit}; + match expr { + IRExpr::PropAccess { property, .. } => Some(col(property)), + IRExpr::Literal(l) => literal_to_expr(l), + IRExpr::Param(name) => params.get(name).and_then(literal_to_expr), + _ => None, + } +} + +/// Convert a Literal to a DataFusion `Expr`. Returns `None` for List +/// (which the existing SQL path also can't pushdown — falls through to +/// post-scan in-memory application). +fn literal_to_expr(lit: &Literal) -> Option { + use datafusion::prelude::lit as df_lit; + Some(match lit { + Literal::Null => df_lit(datafusion::scalar::ScalarValue::Null), + Literal::String(s) => df_lit(s.clone()), + Literal::Integer(n) => df_lit(*n), + Literal::Float(f) => df_lit(*f), + Literal::Bool(b) => df_lit(*b), + // Date/DateTime stored as strings; pass through as string literals + // — Lance/DataFusion handles the comparison against typed columns + // via implicit cast, matching the existing string-SQL behavior. + Literal::Date(s) => df_lit(s.clone()), + Literal::DateTime(s) => df_lit(s.clone()), + Literal::List(_) => return None, + }) +} + fn prefix_batch(batch: &RecordBatch, variable: &str) -> Result { let fields: Vec = batch.schema().fields().iter().map(|f| { Field::new(format!("{}.{}", variable, f.name()), f.data_type().clone(), f.is_nullable()) diff --git a/crates/omnigraph/src/exec/staging.rs b/crates/omnigraph/src/exec/staging.rs index ad39bc0..0d26fd3 100644 --- a/crates/omnigraph/src/exec/staging.rs +++ b/crates/omnigraph/src/exec/staging.rs @@ -26,10 +26,10 @@ use arrow_schema::SchemaRef; use lance::Dataset; use omnigraph_compiler::catalog::EdgeType; -use crate::db::{MutationOpKind, SubTableUpdate}; use crate::db::manifest::{ - new_sidecar, write_sidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin, + RecoverySidecarHandle, SidecarKind, SidecarTablePin, new_sidecar, write_sidecar, }; +use crate::db::{MutationOpKind, SubTableUpdate}; use crate::error::{OmniError, Result}; /// Whether the per-table accumulator should commit via `stage_append` @@ -119,10 +119,12 @@ impl MutationStaging { expected_version: u64, op_kind: MutationOpKind, ) { - self.paths.entry(table_key.to_string()).or_insert(StagedTablePath { - full_path, - table_branch, - }); + self.paths + .entry(table_key.to_string()) + .or_insert(StagedTablePath { + full_path, + table_branch, + }); self.expected_versions .entry(table_key.to_string()) .or_insert(expected_version); @@ -202,7 +204,8 @@ impl MutationStaging { /// Record a delete that already inline-committed at the Lance layer. pub(crate) fn record_inline(&mut self, update: SubTableUpdate) { - self.inline_committed.insert(update.table_key.clone(), update); + self.inline_committed + .insert(update.table_key.clone(), update); } /// Read-your-writes accessor: the accumulated pending batches for @@ -308,18 +311,13 @@ impl MutationStaging { // mode is exempt because no-key node and edge inserts use // ULID-generated ids that are unique within a query. let combined = match table.mode { - PendingMode::Merge => { - dedupe_merge_batches_by_id(&table.schema, table.batches)? - } + PendingMode::Merge => dedupe_merge_batches_by_id(&table.schema, table.batches)?, PendingMode::Append => { if table.batches.len() == 1 { table.batches.into_iter().next().unwrap() } else { - arrow_select::concat::concat_batches( - &table.schema, - &table.batches, - ) - .map_err(|e| OmniError::Lance(e.to_string()))? + arrow_select::concat::concat_batches(&table.schema, &table.batches) + .map_err(|e| OmniError::Lance(e.to_string()))? } } }; @@ -327,9 +325,7 @@ impl MutationStaging { // Stage produces uncommitted fragments + transaction. No // Lance HEAD advance until `commit_all` runs `commit_staged`. let staged = match table.mode { - PendingMode::Append => { - db.table_store().stage_append(&ds, combined, &[]).await? - } + PendingMode::Append => db.table_store().stage_append(&ds, combined, &[]).await?, PendingMode::Merge => { db.table_store() .stage_merge_insert( @@ -420,7 +416,7 @@ impl StagedMutation { /// /// Revalidation: between `stage_all` and `commit_all`, another /// writer (in the same process or another process sharing the - /// repo) may have committed to one of our touched tables, advancing + /// graph) may have committed to one of our touched tables, advancing /// the manifest pin past our `expected_version`. We revalidate /// under the queue and fail-fast with `manifest_conflict` before /// any `commit_staged` so the orphaned uncommitted fragments stay @@ -462,9 +458,8 @@ impl StagedMutation { // from interleaving between our delete and our publish, which // would otherwise leave a Lance-HEAD-ahead residual the // delete-only sidecar (added below) would have to recover. - let mut queue_keys: Vec<(String, Option)> = Vec::with_capacity( - staged.len() + inline_committed.len(), - ); + let mut queue_keys: Vec<(String, Option)> = + Vec::with_capacity(staged.len() + inline_committed.len()); for entry in &staged { queue_keys.push((entry.table_key.clone(), entry.path.table_branch.clone())); } @@ -565,9 +560,8 @@ impl StagedMutation { // Finding 3 hazard: delete-only mutations would otherwise skip // the sidecar, leaving any commit→publish residual unreachable // by recovery. - let mut pins: Vec = Vec::with_capacity( - staged.len() + inline_committed.len(), - ); + let mut pins: Vec = + Vec::with_capacity(staged.len() + inline_committed.len()); for entry in &staged { pins.push(SidecarTablePin { table_key: entry.table_key.clone(), @@ -899,10 +893,7 @@ pub(crate) async fn count_src_per_edge( /// Count pending edges per `src` with NO dedup. Correct when caller /// guarantees pending rows have unique primary keys (engine inserts via /// fresh ULID; loader Append mode). -fn count_pending_src_naive( - pending_batches: &[RecordBatch], - counts: &mut HashMap, -) { +fn count_pending_src_naive(pending_batches: &[RecordBatch], counts: &mut HashMap) { for batch in pending_batches { let Some(col) = batch.column_by_name("src") else { continue; @@ -947,12 +938,15 @@ fn count_pending_src_with_dedupe( dedupe_key_column ))); }; - let key_arr = key_col.as_any().downcast_ref::().ok_or_else(|| { - OmniError::Lance(format!( - "count_src_per_edge: pending '{}' column is not Utf8", - dedupe_key_column - )) - })?; + let key_arr = key_col + .as_any() + .downcast_ref::() + .ok_or_else(|| { + OmniError::Lance(format!( + "count_src_per_edge: pending '{}' column is not Utf8", + dedupe_key_column + )) + })?; let src_arr = batch .column_by_name("src") .and_then(|c| c.as_any().downcast_ref::()); diff --git a/crates/omnigraph/src/lib.rs b/crates/omnigraph/src/lib.rs index d781096..ff0b3d6 100644 --- a/crates/omnigraph/src/lib.rs +++ b/crates/omnigraph/src/lib.rs @@ -1,3 +1,12 @@ +// Lance 6's trait surface (heavier futures/streams nesting around the +// staged-write API in `storage_layer.rs`) pushes us past the default +// trait-resolution recursion limit of 128 on Linux builds. Raising to +// 256 here is the upstream-suggested fix from rustc itself +// ("consider increasing the recursion limit"). macOS happens to short- +// circuit before tripping the limit; CI on Linux does not. Revisit if +// future Lance bumps stop needing this. +#![recursion_limit = "256"] + pub mod changes; pub mod db; pub mod embedding; diff --git a/crates/omnigraph/src/storage.rs b/crates/omnigraph/src/storage.rs index 5d2e568..e90c693 100644 --- a/crates/omnigraph/src/storage.rs +++ b/crates/omnigraph/src/storage.rs @@ -66,7 +66,7 @@ impl StorageAdapter for LocalStorageAdapter { // Ensure parent directory exists. S3 has no equivalent (PutObject // is path-agnostic). For local fs, callers like the recovery // sidecar protocol expect transparent directory creation under - // the repo root (the `__recovery/` directory doesn't pre-exist; + // the graph root (the `__recovery/` directory doesn't pre-exist; // first sidecar write creates it). if let Some(parent) = path.parent() { if !parent.as_os_str().is_empty() { @@ -398,10 +398,13 @@ mod tests { #[test] fn storage_backend_selection_is_scheme_aware() { - assert_eq!(storage_kind_for_uri("/tmp/repo"), StorageKind::Local); - assert_eq!(storage_kind_for_uri("file:///tmp/repo"), StorageKind::Local); + assert_eq!(storage_kind_for_uri("/tmp/graph"), StorageKind::Local); assert_eq!( - storage_kind_for_uri("s3://omnigraph-preview/repo"), + storage_kind_for_uri("file:///tmp/graph"), + StorageKind::Local + ); + assert_eq!( + storage_kind_for_uri("s3://omnigraph-preview/graph"), StorageKind::S3 ); } @@ -440,8 +443,8 @@ mod tests { #[test] fn parse_s3_uri_splits_bucket_and_key() { - let location = parse_s3_uri("s3://bucket/repo/_schema.pg").unwrap(); + let location = parse_s3_uri("s3://bucket/graph/_schema.pg").unwrap(); assert_eq!(location.bucket, "bucket"); - assert_eq!(location.key, "repo/_schema.pg"); + assert_eq!(location.key, "graph/_schema.pg"); } } diff --git a/crates/omnigraph/src/storage_layer.rs b/crates/omnigraph/src/storage_layer.rs index 1efe940..b0fc042 100644 --- a/crates/omnigraph/src/storage_layer.rs +++ b/crates/omnigraph/src/storage_layer.rs @@ -10,11 +10,15 @@ //! ## Transitional residuals on the trait //! //! Several inline-commit methods remain on the trait surface as -//! documented residuals: `delete_where` (Lance 4.0.0's `DeleteJob` is -//! `pub(crate)` — see [#6658](https://github.com/lance-format/lance/issues/6658)), +//! documented residuals: `delete_where` +//! ([#6658](https://github.com/lance-format/lance/issues/6658) closed +//! 2026-05-14, but the public `DeleteBuilder::execute_uncommitted` API +//! did not backport to the 6.x release line — it first ships in +//! `v7.0.0-beta.10`. Migration to staged two-phase delete is tracked as +//! MR-A and is gated on the Lance v7.x bump, not the current v6.0.1 pin), //! `create_vector_index` (segment-commit-path requires //! `build_index_metadata_from_segments` which is `pub(crate)` — see -//! [#6666](https://github.com/lance-format/lance/issues/6666)), and the +//! [#6666](https://github.com/lance-format/lance/issues/6666), still open), and the //! legacy `append_batch` / `merge_insert_batches` / `overwrite_batch` / //! `create_btree_index` / `create_inverted_index` paths kept while //! engine call sites finish migrating off of them (Phase 1b / Phase 9 diff --git a/crates/omnigraph/src/table_store.rs b/crates/omnigraph/src/table_store.rs index 22d8148..c896b05 100644 --- a/crates/omnigraph/src/table_store.rs +++ b/crates/omnigraph/src/table_store.rs @@ -8,15 +8,17 @@ use lance::Dataset; use lance::blob::BlobArrayBuilder; use lance::dataset::scanner::{ColumnOrdering, DatasetRecordBatchStream, Scanner}; use lance::dataset::transaction::{Operation, Transaction, TransactionBuilder}; +use lance::dataset::write::merge_insert::SourceDedupeBehavior; use lance::dataset::{ CommitBuilder, InsertBuilder, MergeInsertBuilder, WhenMatched, WhenNotMatched, WriteMode, WriteParams, }; use lance::datatypes::BlobKind; +use lance::index::DatasetIndexExt; use lance::index::scalar::IndexDetails; use lance_file::version::LanceFileVersion; use lance_index::scalar::{InvertedIndexParams, ScalarIndexParams}; -use lance_index::{DatasetIndexExt, IndexType, is_system_index}; +use lance_index::{IndexType, is_system_index}; use lance_linalg::distance::MetricType; use lance_table::format::{Fragment, IndexMetadata, RowIdMeta}; use lance_table::rowids::{RowIdSequence, write_row_ids}; @@ -651,15 +653,58 @@ impl TableStore { return self.table_state(dataset_uri, &ds).await; } + // Precondition for the FirstSeen workaround below: every caller of + // this primitive must hand in a source batch that is unique by + // `key_columns`. Without this check, `SourceDedupeBehavior::FirstSeen` + // would silently collapse genuine duplicates instead of erroring. + check_batch_unique_by_keys(&batch, &key_columns, "merge_insert_batch")?; + // TODO(lance-upstream): MergeInsertBuilder does not accept WriteParams, // so allow_external_blob_outside_bases cannot be set here. External URI // blobs via merge_insert (LoadMode::Merge, mutations) are unsupported // until Lance exposes WriteParams on MergeInsertBuilder. let ds = Arc::new(ds); - let job = MergeInsertBuilder::try_new(ds, key_columns) - .map_err(|e| OmniError::Lance(e.to_string()))? - .when_matched(when_matched) - .when_not_matched(when_not_matched) + let mut builder = MergeInsertBuilder::try_new(ds, key_columns) + .map_err(|e| OmniError::Lance(e.to_string()))?; + builder.when_matched(when_matched); + builder.when_not_matched(when_not_matched); + // Workaround for a Lance 4.0.x bug class where sequential + // merge_insert calls against rows previously rewritten by + // merge_insert produce a spurious "Ambiguous merge inserts: + // multiple source rows match the same target row on (id = ...)" + // error. Lance's `processed_row_ids: Mutex>` + // (lance-4.0.0 `src/dataset/write/merge_insert.rs:2099`) + // double-processes the same source/target match against + // datasets previously rewritten by merge_insert, and the default + // `SourceDedupeBehavior::Fail` errors on the second insertion. + // `FirstSeen` makes Lance skip the duplicate match instead. + // + // Covers both observed surfaces: + // - PR #98 (sequential `load --mode merge` against same keys). + // - MR-920 (sequential `update T set {f} where x=y` on same row). + // + // Correctness-preserving for OmniGraph because every call path + // that reaches this primitive either pre-dedupes the source batch + // by id, or surfaces a real source dup via the + // `check_batch_unique_by_keys` precondition above (which fires + // before the FirstSeen setter has a chance to silently collapse + // anything): + // - Load path: `enforce_unique_constraints_intra_batch` + // (`loader/mod.rs:1453`) errors on intra-batch `@key` dups. + // - Mutate path: `MutationStaging::finalize` (`exec/staging.rs`) + // accumulates and dedupes by `id`. + // - Branch-merge path: `compute_source_delta` / + // `compute_three_way_delta` (`exec/merge.rs`) walk via + // `OrderedTableCursor` and `push_row` each id at most once. + // So FirstSeen only suppresses the spurious Lance behavior, never + // user data. Pinned by `loader_rejects_intra_batch_duplicate_keys` + // in `tests/consistency.rs` plus the + // `check_batch_unique_by_keys` precondition. + // + // Retire when upstream Lance fixes the bug class. Tracked at + // MR-957; upstream: lance-format/lance#6877. + builder.source_dedupe_behavior(SourceDedupeBehavior::FirstSeen); + let job = builder .try_build() .map_err(|e| OmniError::Lance(e.to_string()))?; @@ -870,11 +915,26 @@ impl TableStore { "stage_merge_insert called with empty batch".to_string(), )); } + + // Precondition for FirstSeen below. See the comment on + // `merge_insert_batch` for why this check is here, not on the caller: + // every call path that reaches stage_merge_insert (load, + // MutationStaging::finalize, branch_merge::publish_rewritten_merge_table) + // must hand in a source batch that is unique by `key_columns`. + check_batch_unique_by_keys(&batch, &key_columns, "stage_merge_insert")?; + let ds = Arc::new(ds); - let job = MergeInsertBuilder::try_new(ds, key_columns) - .map_err(|e| OmniError::Lance(e.to_string()))? - .when_matched(when_matched) - .when_not_matched(when_not_matched) + let mut builder = MergeInsertBuilder::try_new(ds, key_columns) + .map_err(|e| OmniError::Lance(e.to_string()))?; + builder.when_matched(when_matched); + builder.when_not_matched(when_not_matched); + // See `merge_insert_batch` for the FirstSeen rationale. Workaround + // for the Lance 4.0.x bug class where sequential merge_insert / + // update against rows previously rewritten by merge_insert trips + // Lance's `processed_row_ids` HashSet and errors under the default + // `SourceDedupeBehavior::Fail`. Retire when upstream Lance is fixed. + builder.source_dedupe_behavior(SourceDedupeBehavior::FirstSeen); + let job = builder .try_build() .map_err(|e| OmniError::Lance(e.to_string()))?; let schema = batch.schema(); @@ -1651,3 +1711,107 @@ fn combine_committed_with_staged(ds: &Dataset, staged: &[StagedWrite]) -> Vec Result<()> { + if key_columns.len() != 1 { + return Err(OmniError::manifest_internal(format!( + "{}: check_batch_unique_by_keys currently supports single-column keys only, got {:?}", + context, key_columns + ))); + } + let key_col_name = &key_columns[0]; + let column = batch.column_by_name(key_col_name).ok_or_else(|| { + OmniError::manifest_internal(format!( + "{}: source batch missing key column '{}'", + context, key_col_name + )) + })?; + let strs = column + .as_any() + .downcast_ref::() + .ok_or_else(|| { + OmniError::manifest_internal(format!( + "{}: key column '{}' is not a StringArray (got {:?})", + context, + key_col_name, + column.data_type() + )) + })?; + + let mut seen: std::collections::HashSet<&str> = + std::collections::HashSet::with_capacity(batch.num_rows()); + for i in 0..strs.len() { + if !strs.is_valid(i) { + continue; + } + let v = strs.value(i); + if !seen.insert(v) { + return Err(OmniError::manifest(format!( + "{}: duplicate source row for key '{}' (column '{}'); \ + callers must hand in a batch unique by `key_columns` \ + — see MR-957", + context, v, key_col_name + ))); + } + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::StringArray; + use arrow_schema::{DataType, Field, Schema}; + + fn batch_with_ids(ids: &[&str]) -> RecordBatch { + let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Utf8, false)])); + let col = Arc::new(StringArray::from(ids.to_vec())) as ArrayRef; + RecordBatch::try_new(schema, vec![col]).unwrap() + } + + #[test] + fn check_batch_unique_by_keys_passes_when_all_unique() { + let batch = batch_with_ids(&["a", "b", "c"]); + check_batch_unique_by_keys(&batch, &["id".to_string()], "test").unwrap(); + } + + #[test] + fn check_batch_unique_by_keys_errors_on_duplicate_id() { + let batch = batch_with_ids(&["a", "b", "a"]); + let err = + check_batch_unique_by_keys(&batch, &["id".to_string()], "test").unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("duplicate source row for key 'a'"), + "unexpected error: {msg}" + ); + assert!(msg.contains("MR-957"), "error should reference MR-957: {msg}"); + } + + #[test] + fn check_batch_unique_by_keys_rejects_multi_column_keys() { + let batch = batch_with_ids(&["a"]); + let err = check_batch_unique_by_keys( + &batch, + &["id".to_string(), "other".to_string()], + "test", + ) + .unwrap_err(); + assert!(err.to_string().contains("single-column keys only")); + } +} diff --git a/crates/omnigraph/tests/branching.rs b/crates/omnigraph/tests/branching.rs index 4d292f7..5a0c47d 100644 --- a/crates/omnigraph/tests/branching.rs +++ b/crates/omnigraph/tests/branching.rs @@ -4,7 +4,8 @@ use std::fs; use arrow_array::{Array, Int32Array, UInt64Array}; use futures::TryStreamExt; -use lance_index::{DatasetIndexExt, is_system_index}; +use lance::index::DatasetIndexExt; +use lance_index::is_system_index; use omnigraph::db::commit_graph::CommitGraph; use omnigraph::db::{MergeOutcome, Omnigraph, ReadTarget}; diff --git a/crates/omnigraph/tests/composite_flow.rs b/crates/omnigraph/tests/composite_flow.rs index 63ec8b2..6c720da 100644 --- a/crates/omnigraph/tests/composite_flow.rs +++ b/crates/omnigraph/tests/composite_flow.rs @@ -56,7 +56,7 @@ async fn composite_flow_canonical_lifecycle() { let uri = dir.path().to_str().unwrap(); // ───────────────────────────────────────────────────────────────── - // Step 1: init a fresh repo with the standard test schema. + // Step 1: init a fresh graph with the standard test schema. // ───────────────────────────────────────────────────────────────── let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); let v_init = version_branch(&db, "main").await.unwrap(); @@ -70,7 +70,9 @@ async fn composite_flow_canonical_lifecycle() { // Step 2: load JSONL seed data (Person + Company nodes, // Knows + WorksAt edges). // ───────────────────────────────────────────────────────────────── - load_jsonl(&mut db, TEST_DATA, LoadMode::Append).await.unwrap(); + load_jsonl(&mut db, TEST_DATA, LoadMode::Append) + .await + .unwrap(); let v_after_load = version_branch(&db, "main").await.unwrap(); assert!( v_after_load > v_init, @@ -119,19 +121,13 @@ async fn composite_flow_canonical_lifecycle() { "feature", MUTATION_QUERIES, "insert_person_and_friend", - &mixed_params( - &[("$name", "Frank"), ("$friend", "Eve")], - &[("$age", 33)], - ), + &mixed_params(&[("$name", "Frank"), ("$friend", "Eve")], &[("$age", 33)]), ) .await .expect("multi-statement insert+edge on feature"); // After: feature has 4 + Eve + Frank = 6 Persons. - let snap = db - .snapshot_of(ReadTarget::branch("feature")) - .await - .unwrap(); + let snap = db.snapshot_of(ReadTarget::branch("feature")).await.unwrap(); let person_ds = snap.open("node:Person").await.unwrap(); assert_eq!( person_ds.count_rows(None).await.unwrap(), @@ -321,14 +317,10 @@ async fn composite_flow_canonical_lifecycle() { ); // Re-run a query to verify post-optimize correctness. - let post_optimize_total = query_main( - &mut db, - TEST_QUERIES, - "total_people", - &ParamMap::default(), - ) - .await - .unwrap(); + let post_optimize_total = + query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default()) + .await + .unwrap(); assert!( !post_optimize_total.batches().is_empty(), "queries must still work after optimize" @@ -385,14 +377,9 @@ async fn composite_flow_canonical_lifecycle() { // post-cleanup. Post-cleanup mutation is omitted here pending // resolution of the optimize-vs-manifest-pin interaction documented // in Step 10. - let final_total = query_main( - &mut db, - TEST_QUERIES, - "total_people", - &ParamMap::default(), - ) - .await - .unwrap(); + let final_total = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default()) + .await + .unwrap(); assert!(!final_total.batches().is_empty()); } @@ -431,10 +418,12 @@ async fn composite_flow_schema_apply_then_branch_ops_no_deadlock_in_refresh() { // Step 1: init + load on handle A. let mut db_a = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); - load_jsonl(&mut db_a, TEST_DATA, LoadMode::Append).await.unwrap(); + load_jsonl(&mut db_a, TEST_DATA, LoadMode::Append) + .await + .unwrap(); assert_eq!(count_rows(&db_a, "node:Person").await, 4); - // Step 2: open handle B on the same repo. B's in-memory schema_source + // Step 2: open handle B on the same graph. B's in-memory schema_source // cache is now a snapshot of `_schema.pg` at open time. let db_b = Omnigraph::open(uri).await.unwrap(); @@ -444,7 +433,7 @@ async fn composite_flow_schema_apply_then_branch_ops_no_deadlock_in_refresh() { // to disk. const TEST_SCHEMA_V2: &str = "node Person {\n name: String @key\n age: I32?\n nickname: String?\n}\n\nnode Company {\n name: String @key\n}\n\nedge Knows: Person -> Person {\n since: Date?\n}\n\nedge WorksAt: Person -> Company\n"; let plan = db_a.apply_schema(TEST_SCHEMA_V2).await.unwrap(); - assert!(plan.applied, "apply_schema must succeed on a clean repo"); + assert!(plan.applied, "apply_schema must succeed on a clean graph"); assert!( !plan.steps.is_empty(), "apply_schema must record the AddProperty step" @@ -561,7 +550,9 @@ async fn composite_flow_multi_branch_sequential_merges() { // edges from test.jsonl). // ───────────────────────────────────────────────────────────────── let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); - load_jsonl(&mut db, TEST_DATA, LoadMode::Append).await.unwrap(); + load_jsonl(&mut db, TEST_DATA, LoadMode::Append) + .await + .unwrap(); assert_eq!(count_rows(&db, "node:Person").await, 4); assert_eq!(count_rows(&db, "edge:Knows").await, 3); @@ -687,10 +678,7 @@ async fn composite_flow_multi_branch_sequential_merges() { "feat-a", MUTATION_QUERIES, "insert_person_and_friend", - &mixed_params( - &[("$name", "Grace"), ("$friend", "Eve")], - &[("$age", 28)], - ), + &mixed_params(&[("$name", "Grace"), ("$friend", "Eve")], &[("$age", 28)]), ) .await .expect("insert Grace + Knows(Grace → Eve) on feat-a"); @@ -821,15 +809,14 @@ async fn composite_flow_multi_branch_sequential_merges() { // `total_people` returns count(Person) = 10. Catches regressions in // group-by/count execution against a multi-fragment table whose // current shape was produced by two sequential merges. - let total_post_merges = query_main( - &mut db, - TEST_QUERIES, - "total_people", - &ParamMap::default(), - ) - .await - .unwrap(); - assert_total(&total_post_merges, 10, "post both merges, main must total 10 Persons"); + let total_post_merges = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default()) + .await + .unwrap(); + assert_total( + &total_post_merges, + 10, + "post both merges, main must total 10 Persons", + ); // ───────────────────────────────────────────────────────────────── // Step 14: time-travel to pre-merge-a-version. Reads must return @@ -1021,14 +1008,9 @@ async fn composite_flow_multi_branch_sequential_merges() { // correctly to disk but the reopened catalog can't bind them. // ───────────────────────────────────────────────────────────────── let mut db = db; - let post_reopen_total = query_main( - &mut db, - TEST_QUERIES, - "total_people", - &ParamMap::default(), - ) - .await - .unwrap(); + let post_reopen_total = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default()) + .await + .unwrap(); assert_total( &post_reopen_total, 10, diff --git a/crates/omnigraph/tests/consistency.rs b/crates/omnigraph/tests/consistency.rs index 63dc3f7..26517db 100644 --- a/crates/omnigraph/tests/consistency.rs +++ b/crates/omnigraph/tests/consistency.rs @@ -119,6 +119,187 @@ async fn load_merge_upserts_existing_and_inserts_new() { } } +/// Regression: two sequential `LoadMode::Merge` invocations against the +/// same set of keys must both succeed. Pre-fix, the second one failed +/// with `Ambiguous merge inserts are prohibited: multiple source rows +/// match the same target row on (id = "TEST-1")` even though every +/// source batch had one row per key. +/// +/// Triggered by Lance's `processed_row_ids: Mutex>` +/// (lance-4.0.0 `src/dataset/write/merge_insert.rs:2099`) double- +/// processing the same source/target match against datasets previously +/// rewritten by merge_insert. Worked around by opting +/// `MergeInsertBuilder` into `SourceDedupeBehavior::FirstSeen` in +/// `crates/omnigraph/src/table_store.rs` — see that file for the full +/// rationale and the safety pin (`loader_rejects_intra_batch_duplicate_keys`). +/// Tracked at MR-957; upstream: lance-format/lance#6877. +#[tokio::test] +async fn load_merge_repeated_against_overlapping_keys_succeeds() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let schema = r#" +node Thing { + key: String @key + required_val: String + optional_val: String? +} +"#; + let mut db = Omnigraph::init(uri, schema).await.unwrap(); + + // Seed with 50 fully-populated rows (id + required + optional). + let mut seed = String::new(); + for i in 1..=50 { + seed.push_str(&format!( + r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i}","optional_val":"optional {i}"}}}} +"#, + )); + } + load_jsonl(&mut db, &seed, LoadMode::Overwrite) + .await + .unwrap(); + + // Partial-schema delta — mirrors the bug report exactly: omits + // `optional_val`. 25 existing keys + 5 new keys, one row per key. + let mut delta = String::new(); + for i in (1..=25).chain(51..=55) { + delta.push_str(&format!( + r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i} UPDATED"}}}} +"#, + )); + } + + load_jsonl(&mut db, &delta, LoadMode::Merge) + .await + .expect("first merge must succeed"); + assert_eq!(count_rows(&db, "node:Thing").await, 55); + + load_jsonl(&mut db, &delta, LoadMode::Merge) + .await + .expect("second merge against same keys must succeed"); + assert_eq!(count_rows(&db, "node:Thing").await, 55); +} + +/// Safety pin for the `SourceDedupeBehavior::FirstSeen` workaround in +/// `crates/omnigraph/src/table_store.rs`. FirstSeen tells Lance to +/// silently skip a duplicate source row instead of erroring. Our use of +/// it depends on user-provided duplicates being rejected *before* the +/// batch reaches Lance — otherwise FirstSeen could silently drop user +/// data. +/// +/// Defense in depth: +/// 1. The loader's `enforce_unique_constraints_intra_batch` +/// (`loader/mod.rs:1453`), invoked unconditionally on any node type +/// with a `@key`, errors on intra-batch duplicate `@key` values at +/// intake — pinned by this test across every `LoadMode`. +/// 2. The `check_batch_unique_by_keys` precondition at the top of +/// `merge_insert_batch` and `stage_merge_insert` is the final +/// fail-fast guard: even if a future caller bypasses the loader path +/// (e.g. branch-merge's `publish_rewritten_merge_table` builds its +/// own source batch directly), a real duplicate id reaches Lance +/// only after surfacing as an `OmniError::Manifest`, never silently +/// via FirstSeen. Pinned by the unit tests in `table_store::tests`. +#[tokio::test] +async fn loader_rejects_intra_batch_duplicate_keys() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let schema = r#" +node Thing { + key: String @key + value: String +} +"#; + let mut db = Omnigraph::init(uri, schema).await.unwrap(); + + let dupes = r#"{"type":"Thing","data":{"key":"DUP","value":"first"}} +{"type":"Thing","data":{"key":"DUP","value":"second"}} +"#; + + for mode in [LoadMode::Overwrite, LoadMode::Append, LoadMode::Merge] { + let err = load_jsonl(&mut db, dupes, mode).await.unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("@unique violation") && msg.contains("DUP"), + "load mode {mode:?} must reject intra-batch duplicate @key (got: {msg})" + ); + assert_eq!( + count_rows(&db, "node:Thing").await, + 0, + "load mode {mode:?} must not persist any rows when the batch is rejected" + ); + } +} + +/// Canary for the upstream Lance gap that the `FirstSeen` workaround +/// in `table_store.rs` masks. The bug class is "Window 2": load → +/// indices built explicitly → merge → merge. Even with the engine +/// fully aligned to the "indexes are derived state" invariant +/// (MR-848), as long as an `id` index has been built between the +/// first and second merge_insert, the Lance internal that triggers +/// the bug remains reachable. +/// +/// This test runs the Window-2 sequence under the FirstSeen workaround. +/// It is expected to pass today. If a future Lance upgrade or local +/// change makes it START failing, the workaround has lost effectiveness +/// (upstream Lance changed something, or the FirstSeen setter was +/// dropped from `table_store.rs`). If a future Lance upgrade fixes the +/// bug class, this test continues to pass and the FirstSeen setter can +/// be retired. +/// +/// Tracked at MR-957; upstream: lance-format/lance#6877. +#[tokio::test] +async fn load_merge_window_2_documents_upstream_lance_gap() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let schema = r#" +node Thing { + key: String @key + required_val: String + optional_val: String? +} +"#; + let mut db = Omnigraph::init(uri, schema).await.unwrap(); + + let mut seed = String::new(); + for i in 1..=50 { + seed.push_str(&format!( + r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i}","optional_val":"optional {i}"}}}} +"#, + )); + } + load_jsonl(&mut db, &seed, LoadMode::Overwrite) + .await + .unwrap(); + + // Explicit ensure_indices between seed and the merges — the Window + // 2 trigger. The eager-build behavior (MR-583) means the BTREE on + // `id` is already present here, but calling explicitly pins the + // invariant for the post-MR-848 future where the eager build is + // gone. + db.ensure_indices().await.unwrap(); + + let mut delta = String::new(); + for i in (1..=25).chain(51..=55) { + delta.push_str(&format!( + r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i} UPDATED"}}}} +"#, + )); + } + + // Both merges must succeed under the FirstSeen workaround. + // `processed_row_ids` re-processes the same target row_id under + // the default `SourceDedupeBehavior::Fail`; FirstSeen tolerates it. + load_jsonl(&mut db, &delta, LoadMode::Merge) + .await + .expect("first merge after ensure_indices must succeed"); + db.ensure_indices().await.unwrap(); + load_jsonl(&mut db, &delta, LoadMode::Merge).await.expect( + "second merge after ensure_indices must succeed \ + (Window 2 canary: drop the FirstSeen setter in table_store.rs \ + only when this stays green WITHOUT it)", + ); + assert_eq!(count_rows(&db, "node:Thing").await, 55); +} + #[tokio::test] async fn cross_type_traversal_deduplicates_duplicate_edges() { let dir = tempfile::tempdir().unwrap(); @@ -163,7 +344,7 @@ async fn explicit_target_query_sees_other_writer_commits_without_refresh() { let uri = dir.path().to_str().unwrap(); - // Two independent handles to the same repo + // Two independent handles to the same graph let mut db1 = Omnigraph::open(uri).await.unwrap(); let mut db2 = Omnigraph::open(uri).await.unwrap(); diff --git a/crates/omnigraph/tests/end_to_end.rs b/crates/omnigraph/tests/end_to_end.rs index 6a41830..0d9e58e 100644 --- a/crates/omnigraph/tests/end_to_end.rs +++ b/crates/omnigraph/tests/end_to_end.rs @@ -1866,3 +1866,65 @@ async fn ensure_indices_does_not_error_on_repeated_call() { let ds = snap.open("node:Person").await.unwrap(); assert_eq!(ds.count_rows(None).await.unwrap(), 4); } + +// ─── DataFusion-Expr filter pushdown (Tier-1 follow-up to the Lance v6 bump) ── + +/// Regression for `CompOp::Contains` pushdown via `array_has` in +/// `ir_filter_to_expr`. Before the Expr-pushdown refactor, the +/// `ir_filter_to_sql` family returned `None` for list-contains (the +/// comment said *"Can't pushdown list contains"*) and the predicate was +/// applied post-scan in memory. With `Scanner::filter_expr(Expr)` and +/// DF's `array_has` builtin, the contains predicate now pushes down to +/// Lance — the test confirms results are correct AND the pushdown path +/// is exercised (a regression on the pushdown would land all rows in +/// the scan, then be filtered post-hoc; that still produces the right +/// count so this test pins correctness, while `lance_surface_guards.rs` +/// is the structural pin for the surface itself). +#[tokio::test] +async fn ir_filter_with_list_contains_pushes_down() { + let schema = r#" +node Doc { + slug: String @key + tags: [String] +} +"#; + let data = r#"{"type":"Doc","data":{"slug":"alpha","tags":["red","blue"]}} +{"type":"Doc","data":{"slug":"bravo","tags":["green"]}} +{"type":"Doc","data":{"slug":"charlie","tags":["red","green"]}} +{"type":"Doc","data":{"slug":"delta","tags":[]}}"#; + + let dir = tempfile::tempdir().unwrap(); + let mut db = Omnigraph::init(dir.path().to_str().unwrap(), schema) + .await + .unwrap(); + load_jsonl(&mut db, data, LoadMode::Overwrite) + .await + .unwrap(); + + let queries = r#" +query docs_with_tag($tag: String) { + match { + $d: Doc + $d.tags contains $tag + } + return { $d.slug } +} +"#; + let result = query_main(&mut db, queries, "docs_with_tag", ¶ms(&[("$tag", "red")])) + .await + .unwrap(); + + let batch = result.concat_batches().unwrap(); + let slugs = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let mut got: Vec<&str> = (0..slugs.len()).map(|i| slugs.value(i)).collect(); + got.sort(); + assert_eq!( + got, + vec!["alpha", "charlie"], + "contains-pushdown should return exactly the rows whose tags list contains 'red'" + ); +} diff --git a/crates/omnigraph/tests/failpoints.rs b/crates/omnigraph/tests/failpoints.rs index e8de05e..11cff73 100644 --- a/crates/omnigraph/tests/failpoints.rs +++ b/crates/omnigraph/tests/failpoints.rs @@ -66,7 +66,7 @@ async fn graph_publish_failpoint_triggers_before_commit_append() { // Atomic schema apply: schema apply writes staging files first, then commits // the manifest, then renames staging → final. Tests below inject crashes at -// the two boundaries and assert that reopening the repo yields a consistent +// the two boundaries and assert that reopening the graph yields a consistent // state. #[tokio::test] @@ -303,14 +303,10 @@ async fn inline_delete_conflict_writes_sidecar_before_rejecting() { let person_uri = node_table_uri(&uri, "Person"); { - let _pause_delete = ScopedFailPoint::new("mutation.delete_node_pre_primary_delete", "pause"); + let _pause_delete = + ScopedFailPoint::new("mutation.delete_node_pre_primary_delete", "pause"); let delete_params = helpers::params(&[("$name", "Alice")]); - let delete = db.mutate( - "main", - MUTATION_QUERIES, - "remove_person", - &delete_params, - ); + let delete = db.mutate("main", MUTATION_QUERIES, "remove_person", &delete_params); tokio::pin!(delete); let mut concurrent_update_succeeded = false; @@ -325,15 +321,18 @@ async fn inline_delete_conflict_writes_sidecar_before_rejecting() { "set_age", &mixed_params(&[("$name", "Bob")], &[("$age", 26)]), ) - .await - .is_ok() + .await + .is_ok() { concurrent_update_succeeded = true; break; } tokio::time::sleep(std::time::Duration::from_millis(20)).await; } - assert!(concurrent_update_succeeded, "concurrent update must land while delete is paused"); + assert!( + concurrent_update_succeeded, + "concurrent update must land while delete is paused" + ); fail::remove("mutation.delete_node_pre_primary_delete"); let err = delete.await.unwrap_err(); @@ -464,7 +463,7 @@ async fn recovery_rolls_forward_load_on_feature_branch() { #[tokio::test] async fn recovery_rolls_forward_ensure_indices_on_feature_branch() { - use lance_index::DatasetIndexExt; + use lance::index::DatasetIndexExt; use omnigraph::loader::{LoadMode, load_jsonl}; use omnigraph::table_store::TableStore; @@ -925,13 +924,13 @@ async fn ensure_indices_stage_btree_failure_leaves_existing_tables_writable() { .expect("Person mutation must succeed after the failed schema apply — existing tables are not drifted"); } -fn assert_no_staging_files(repo: &std::path::Path) { +fn assert_no_staging_files(graph: &std::path::Path) { for name in [ "_schema.pg.staging", "_schema.ir.json.staging", "__schema_state.json.staging", ] { - let path = repo.join(name); + let path = graph.join(name); assert!( !path.exists(), "staging file {} still exists after recovery", @@ -1164,7 +1163,7 @@ edge WorksAt: Person -> Company // NEW schema (city column on Person, Tag node type) — not the old. // Without the schema-staging coordination, the schema-state // recovery would have deleted the staging files (because manifest - // hadn't advanced when it ran), leaving a corrupt repo with new- + // hadn't advanced when it ran), leaving a corrupt graph with new- // schema data on disk but old-schema catalog. let live_schema = std::fs::read_to_string(dir.path().join("_schema.pg")).unwrap(); assert!( diff --git a/crates/omnigraph/tests/helpers/mod.rs b/crates/omnigraph/tests/helpers/mod.rs index e7e1efb..c97ff72 100644 --- a/crates/omnigraph/tests/helpers/mod.rs +++ b/crates/omnigraph/tests/helpers/mod.rs @@ -44,7 +44,7 @@ query insert_person_and_friend($name: String, $age: I32, $friend: String) { } "#; -/// Init a repo and load the standard test data. +/// Init a graph and load the standard test data. pub async fn init_and_load(dir: &tempfile::TempDir) -> Omnigraph { let uri = dir.path().to_str().unwrap(); let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); @@ -249,7 +249,7 @@ pub fn vector_and_string_params( map } -pub fn s3_test_repo_uri(suite: &str) -> Option { +pub fn s3_test_graph_uri(suite: &str) -> Option { let bucket = std::env::var("OMNIGRAPH_S3_TEST_BUCKET").ok()?; let prefix = std::env::var("OMNIGRAPH_S3_TEST_PREFIX") .ok() diff --git a/crates/omnigraph/tests/helpers/recovery.rs b/crates/omnigraph/tests/helpers/recovery.rs index 3a8505f..c76009e 100644 --- a/crates/omnigraph/tests/helpers/recovery.rs +++ b/crates/omnigraph/tests/helpers/recovery.rs @@ -110,8 +110,8 @@ impl FollowUpMutation { } } -pub fn single_sidecar_operation_id(repo_root: &Path) -> String { - let ids = sidecar_operation_ids(repo_root); +pub fn single_sidecar_operation_id(graph_root: &Path) -> String { + let ids = sidecar_operation_ids(graph_root); assert_eq!( ids.len(), 1, @@ -121,8 +121,8 @@ pub fn single_sidecar_operation_id(repo_root: &Path) -> String { ids.into_iter().next().unwrap() } -pub fn sidecar_operation_ids(repo_root: &Path) -> Vec { - let dir = repo_root.join("__recovery"); +pub fn sidecar_operation_ids(graph_root: &Path) -> Vec { + let dir = graph_root.join("__recovery"); if !dir.exists() { return Vec::new(); } @@ -143,10 +143,10 @@ pub fn sidecar_operation_ids(repo_root: &Path) -> Vec { ids } -pub async fn branch_head_commit_id(repo_root: &Path, branch: &str) -> Result { +pub async fn branch_head_commit_id(graph_root: &Path, branch: &str) -> Result { let graph = match branch { - "main" => CommitGraph::open(&repo_uri(repo_root)).await?, - branch => CommitGraph::open_at_branch(&repo_uri(repo_root), branch).await?, + "main" => CommitGraph::open(&graph_uri(graph_root)).await?, + branch => CommitGraph::open_at_branch(&graph_uri(graph_root), branch).await?, }; graph.head_commit_id().await?.ok_or_else(|| { OmniError::manifest_internal(format!("commit graph for branch {branch} has no head")) @@ -154,52 +154,52 @@ pub async fn branch_head_commit_id(repo_root: &Path, branch: &str) -> Result Result<()> { match expectation { RecoveryExpectation::RolledForward { tables } => { - assert_sidecar_absent(repo_root, operation_id); - let audit = read_audit_row(repo_root, operation_id).await?; + assert_sidecar_absent(graph_root, operation_id); + let audit = read_audit_row(graph_root, operation_id).await?; assert_eq!( audit.recovery_kind, "RolledForward", "audit row for {operation_id} recorded the wrong recovery_kind", ); - assert_manifest_pins_match_lance_heads(repo_root, &tables).await?; - assert_audit_to_versions_match_lance_heads(repo_root, &audit, &tables).await?; - assert_recovery_commit_shape(repo_root, &audit, &tables).await?; - assert_non_main_did_not_move_main(repo_root, &tables).await?; - assert_idempotent_reopen(repo_root, operation_id).await?; - run_follow_up_mutations(repo_root, tables).await?; + assert_manifest_pins_match_lance_heads(graph_root, &tables).await?; + assert_audit_to_versions_match_lance_heads(graph_root, &audit, &tables).await?; + assert_recovery_commit_shape(graph_root, &audit, &tables).await?; + assert_non_main_did_not_move_main(graph_root, &tables).await?; + assert_idempotent_reopen(graph_root, operation_id).await?; + run_follow_up_mutations(graph_root, tables).await?; } RecoveryExpectation::RolledBack { tables } => { - assert_sidecar_absent(repo_root, operation_id); - let audit = read_audit_row(repo_root, operation_id).await?; + assert_sidecar_absent(graph_root, operation_id); + let audit = read_audit_row(graph_root, operation_id).await?; assert_eq!( audit.recovery_kind, "RolledBack", "audit row for {operation_id} recorded the wrong recovery_kind", ); assert_rollback_outcomes_record_drift(&audit); - assert_recovery_commit_shape(repo_root, &audit, &tables).await?; - assert_non_main_did_not_move_main(repo_root, &tables).await?; - assert_idempotent_reopen(repo_root, operation_id).await?; - run_follow_up_mutations(repo_root, tables).await?; + assert_recovery_commit_shape(graph_root, &audit, &tables).await?; + assert_non_main_did_not_move_main(graph_root, &tables).await?; + assert_idempotent_reopen(graph_root, operation_id).await?; + run_follow_up_mutations(graph_root, tables).await?; } RecoveryExpectation::Deferred => { assert!( - sidecar_path(repo_root, operation_id).exists(), + sidecar_path(graph_root, operation_id).exists(), "deferred recovery must leave sidecar {operation_id} on disk", ); assert!( - read_audit_row(repo_root, operation_id).await.is_err(), + read_audit_row(graph_root, operation_id).await.is_err(), "deferred recovery must not record an audit row for {operation_id}", ); } RecoveryExpectation::NoOp => { - assert_sidecar_absent(repo_root, operation_id); + assert_sidecar_absent(graph_root, operation_id); assert!( - read_audit_row(repo_root, operation_id).await.is_err(), + read_audit_row(graph_root, operation_id).await.is_err(), "no-op recovery must not record an audit row for {operation_id}", ); } @@ -216,24 +216,24 @@ fn branch_context(tables: &[TableExpectation]) -> Option { .map(str::to_string) } -fn sidecar_path(repo_root: &Path, operation_id: &str) -> PathBuf { - repo_root +fn sidecar_path(graph_root: &Path, operation_id: &str) -> PathBuf { + graph_root .join("__recovery") .join(format!("{operation_id}.json")) } -fn assert_sidecar_absent(repo_root: &Path, operation_id: &str) { +fn assert_sidecar_absent(graph_root: &Path, operation_id: &str) { assert!( - !sidecar_path(repo_root, operation_id).exists(), + !sidecar_path(graph_root, operation_id).exists(), "recovery sidecar {operation_id} must be deleted after successful recovery", ); } async fn assert_manifest_pins_match_lance_heads( - repo_root: &Path, + graph_root: &Path, tables: &[TableExpectation], ) -> Result<()> { - let uri = repo_uri(repo_root); + let uri = graph_uri(graph_root); let db = Omnigraph::open(&uri).await?; for table in tables { let (entry, lance_head) = entry_and_lance_head(&db, &uri, table).await?; @@ -254,11 +254,11 @@ async fn assert_manifest_pins_match_lance_heads( } async fn assert_audit_to_versions_match_lance_heads( - repo_root: &Path, + graph_root: &Path, audit: &RecoveryAuditRow, tables: &[TableExpectation], ) -> Result<()> { - let uri = repo_uri(repo_root); + let uri = graph_uri(graph_root); let db = Omnigraph::open(&uri).await?; for table in tables { let (_, lance_head) = entry_and_lance_head(&db, &uri, table).await?; @@ -301,10 +301,10 @@ fn assert_rollback_outcomes_record_drift(audit: &RecoveryAuditRow) { } async fn assert_non_main_did_not_move_main( - repo_root: &Path, + graph_root: &Path, tables: &[TableExpectation], ) -> Result<()> { - let uri = repo_uri(repo_root); + let uri = graph_uri(graph_root); let db = Omnigraph::open(&uri).await?; let main = db.snapshot_of(ReadTarget::branch("main")).await?; for table in tables { @@ -327,14 +327,14 @@ async fn assert_non_main_did_not_move_main( } async fn assert_recovery_commit_shape( - repo_root: &Path, + graph_root: &Path, audit: &RecoveryAuditRow, tables: &[TableExpectation], ) -> Result<()> { let branch = branch_context(tables); let expected_parent = expected_recovery_parent(tables)?; let branch = branch.as_deref(); - let commit = read_recovery_commit(repo_root, audit, branch).await?; + let commit = read_recovery_commit(graph_root, audit, branch).await?; assert_eq!( commit.actor_id.as_deref(), @@ -362,7 +362,7 @@ async fn assert_recovery_commit_shape( ); if let Some(branch) = branch { - let graph = CommitGraph::open_at_branch(&repo_uri(repo_root), branch).await?; + let graph = CommitGraph::open_at_branch(&graph_uri(graph_root), branch).await?; let commits = graph.load_commits().await?; let parent = commit.parent_commit_id.as_deref().ok_or_else(|| { OmniError::manifest_internal(format!( @@ -403,12 +403,12 @@ fn expected_recovery_parent(tables: &[TableExpectation]) -> Result Result<()> { - let before = matching_audit_rows(repo_root, operation_id).await?; - let uri = repo_uri(repo_root); +async fn assert_idempotent_reopen(graph_root: &Path, operation_id: &str) -> Result<()> { + let before = matching_audit_rows(graph_root, operation_id).await?; + let uri = graph_uri(graph_root); let _db = Omnigraph::open(&uri).await?; - assert_sidecar_absent(repo_root, operation_id); - let after = matching_audit_rows(repo_root, operation_id).await?; + assert_sidecar_absent(graph_root, operation_id); + let after = matching_audit_rows(graph_root, operation_id).await?; assert_eq!( after.len(), before.len(), @@ -417,14 +417,14 @@ async fn assert_idempotent_reopen(repo_root: &Path, operation_id: &str) -> Resul Ok(()) } -async fn run_follow_up_mutations(repo_root: &Path, tables: Vec) -> Result<()> { +async fn run_follow_up_mutations(graph_root: &Path, tables: Vec) -> Result<()> { let mut db: Option = None; for table in tables { let Some(mutation) = table.follow_up_mutation else { continue; }; if db.is_none() { - db = Some(Omnigraph::open(&repo_uri(repo_root)).await?); + db = Some(Omnigraph::open(&graph_uri(graph_root)).await?); } let db = db.as_mut().unwrap(); db.mutate( @@ -480,11 +480,11 @@ async fn lance_head_for_entry(root_uri: &str, entry: &SubTableEntry) -> Result, ) -> Result { - let uri = repo_uri(repo_root); + let uri = graph_uri(graph_root); let graph = match branch { Some(branch) => CommitGraph::open_at_branch(&uri, branch).await?, None => CommitGraph::open(&uri).await?, @@ -502,8 +502,8 @@ async fn read_recovery_commit( }) } -async fn read_audit_row(repo_root: &Path, operation_id: &str) -> Result { - let mut rows = matching_audit_rows(repo_root, operation_id).await?; +async fn read_audit_row(graph_root: &Path, operation_id: &str) -> Result { + let mut rows = matching_audit_rows(graph_root, operation_id).await?; if rows.len() != 1 { return Err(OmniError::manifest_internal(format!( "expected exactly one recovery audit row for {operation_id}, got {}", @@ -514,10 +514,10 @@ async fn read_audit_row(repo_root: &Path, operation_id: &str) -> Result Result> { - let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance"); + let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance"); if !recoveries_dir.exists() { return Ok(Vec::new()); } @@ -575,6 +575,6 @@ fn string_column<'a>(batch: &'a RecordBatch, name: &str) -> Result<&'a StringArr }) } -fn repo_uri(repo_root: &Path) -> String { - repo_root.to_str().unwrap().to_string() +fn graph_uri(graph_root: &Path) -> String { + graph_root.to_str().unwrap().to_string() } diff --git a/crates/omnigraph/tests/lance_surface_guards.rs b/crates/omnigraph/tests/lance_surface_guards.rs new file mode 100644 index 0000000..b65a808 --- /dev/null +++ b/crates/omnigraph/tests/lance_surface_guards.rs @@ -0,0 +1,244 @@ +//! Lance API surface guards. +//! +//! Each guard pins a Lance API surface that OmniGraph relies on. If a future +//! Lance bump silently renames a variant, restructures a public struct, or +//! flips a method to async, the corresponding guard either fails to compile +//! (compile-time guards) or fails at runtime (runtime guards). The purpose +//! is to turn silent-break risks into red CI bars on the *next* Lance bump, +//! rather than into wrong-state recovery in production. +//! +//! Pair this file with `docs/dev/lance.md`'s alignment audit stanza: any +//! Lance bump runs `cargo test -p omnigraph-engine --test lance_surface_guards` +//! first as the smoke check. +//! +//! ## Compile-only guards +//! +//! Functions prefixed with `_compile_` are gated with a broad `#[allow(...)]` +//! and never called. They exist to make `cargo build -p omnigraph-engine --tests` +//! enforce the API shape. Using `unimplemented!()` as a placeholder lets type +//! inference proceed without running anything. +//! +//! ## Runtime guards +//! +//! Functions decorated `#[tokio::test]` actually run; they construct real +//! values and assert field shapes / types. + +use std::sync::Arc; + +use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray}; +use arrow_schema::{DataType, Field, Schema}; +use lance::Dataset; +use lance::dataset::builder::DatasetBuilder; +use lance::dataset::optimize::{CompactionOptions, compact_files}; +use lance::dataset::write::delete::DeleteResult; +use lance::dataset::{MergeInsertBuilder, WhenMatched, WhenNotMatched, WriteMode, WriteParams}; +use lance_file::version::LanceFileVersion; +use lance_namespace::LanceNamespace; +use lance_table::io::commit::ManifestNamingScheme; + +/// Helper: build a small fresh dataset in a tempdir. Pinned at V2_2 to match +/// production write paths (blob v2 requires V2_2; see `docs/dev/lance.md`). +async fn fresh_dataset(uri: &str) -> Dataset { + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Utf8, false), + Field::new("value", DataType::Int32, false), + ])); + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(StringArray::from(vec!["alice", "bob"])), + Arc::new(Int32Array::from(vec![1, 2])), + ], + ) + .unwrap(); + let reader = RecordBatchIterator::new(vec![Ok(batch)], schema); + let params = WriteParams { + mode: WriteMode::Create, + enable_stable_row_ids: true, + data_storage_version: Some(LanceFileVersion::V2_2), + ..Default::default() + }; + Dataset::write(reader, uri, Some(params)).await.unwrap() +} + +// --- Guard 1: LanceError::TooMuchWriteContention variant exists ------------ +// +// `db/manifest/publisher.rs::map_lance_publish_error` pattern-matches on this +// variant to surface typed `OmniError::ManifestRowLevelCasContention`. If +// Lance renames the variant or removes the builder, this guard fails. + +#[tokio::test] +async fn lance_error_too_much_write_contention_variant_exists() { + let err = lance::Error::too_much_write_contention("guard"); + assert!( + matches!(err, lance::Error::TooMuchWriteContention { .. }), + "Lance::Error::TooMuchWriteContention variant missing or renamed; \ + update db/manifest/publisher.rs::map_lance_publish_error and \ + this guard, then re-pin docs/dev/lance.md." + ); +} + +// --- Guard 2: ManifestLocation field shape --------------------------------- +// +// `db/manifest/metadata.rs:84-88` reads `.path`, `.size`, `.e_tag`, +// `.naming_scheme` off `dataset.manifest_location()`. If any field renames +// or changes type, this guard fails to compile. + +#[tokio::test] +async fn manifest_location_field_shape() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().join("guard.lance"); + let ds = fresh_dataset(uri.to_str().unwrap()).await; + + let loc = ds.manifest_location(); + // Explicit type bindings — these are the load-bearing assertions. If a + // type drifts (e.g. .size: Option → .size: u64), this fails to + // compile. + let _path: &object_store::path::Path = &loc.path; + let _size: Option = loc.size; + let _e_tag: Option = loc.e_tag.clone(); + let _scheme: ManifestNamingScheme = loc.naming_scheme; + // Runtime sanity — naming_scheme should produce a Debug string we use + // verbatim in `TableVersionMetadata::naming_scheme`. + assert!(!format!("{:?}", loc.naming_scheme).is_empty()); +} + +// --- Guard 3: checkout_version + restore async chain ----------------------- +// +// `db/manifest/recovery.rs:505-522` chains `Dataset::open(...).await? +// .checkout_version(N).await?.restore().await?` as the recovery rollback +// hammer. Compile-only — never runs. + +#[allow( + dead_code, + unreachable_code, + unused_variables, + unused_mut, + clippy::diverging_sub_expression +)] +async fn _compile_checkout_version_then_restore_signature() -> lance::Result<()> { + let ds: Dataset = unimplemented!(); + let mut ds: Dataset = ds.checkout_version(1u64).await?; + // `restore()` takes `&mut self` and returns `Result<()>`; the dataset + // mutates in place. If Lance flips this to return a fresh `Dataset` + // (consuming `self`), this guard fails to compile. + let _: () = ds.restore().await?; + Ok(()) +} + +// --- Guard 4: DatasetBuilder::from_namespace fluent chain ------------------ +// +// `db/manifest/namespace.rs:162-174` chains +// `DatasetBuilder::from_namespace(ns, vec![id]).await?.with_branch(...).with_version(...).load().await?`. +// Compile-only. + +#[allow( + dead_code, + unreachable_code, + unused_variables, + unused_mut, + clippy::diverging_sub_expression +)] +async fn _compile_dataset_builder_from_namespace_signature( + ns: Arc, +) -> lance::Result<()> { + let builder: DatasetBuilder = + DatasetBuilder::from_namespace(ns, vec!["table".to_string()]).await?; + let builder: DatasetBuilder = builder.with_branch("b", None); + let builder: DatasetBuilder = builder.with_version(1u64); + let _ds: Dataset = builder.load().await?; + Ok(()) +} + +// --- Guard 5: MergeInsertBuilder fluent chain ------------------------------ +// +// `db/manifest/publisher.rs:370-391` is the manifest CAS. If any method on +// the builder renames or changes signature, the publisher silently breaks. +// Compile-only. + +#[allow( + dead_code, + unreachable_code, + unused_variables, + unused_mut, + clippy::diverging_sub_expression +)] +async fn _compile_merge_insert_builder_method_chain() -> lance::Result<()> { + use lance::dataset::MergeStats; + + let ds: Arc = unimplemented!(); + let job = MergeInsertBuilder::try_new(ds, vec!["object_id".to_string()])? + .when_matched(WhenMatched::UpdateAll) + .when_not_matched(WhenNotMatched::InsertAll) + .conflict_retries(0) + .use_index(false) + .try_build()?; + + // execute_reader takes `impl StreamingWriteSource` (lance trait), which + // RecordBatchIterator implements. Pin the return shape + // `(Arc, MergeStats)` — the publisher's CAS loop depends on + // both: the new Dataset to advance HEAD, the stats for the audit row. + let source: RecordBatchIterator>> = + unimplemented!(); + let result: (Arc, MergeStats) = job.execute_reader(source).await?; + let _ds: Arc = result.0; + let _stats: MergeStats = result.1; + Ok(()) +} + +// --- Guard 6: WriteParams::default() leaves data_storage_version = None ---- +// +// Our V2_2 pin is load-bearing for blob v2 (verified earlier this session +// when V2_1 produced "Blob v2 requires file version >= 2.2" on 13 blob +// tests). If Lance changes the default to pin some version itself, audit +// every `data_storage_version: Some(LanceFileVersion::V2_2)` site. + +#[test] +fn write_params_default_does_not_set_storage_version() { + let params = WriteParams::default(); + assert_eq!( + params.data_storage_version, None, + "WriteParams::default().data_storage_version is no longer None; \ + audit every explicit V2_2 pin (see rg 'LanceFileVersion::V2_2')." + ); +} + +// --- Guard 7: compact_files signature -------------------------------------- +// +// `db/omnigraph/optimize.rs:107` calls `compact_files(&mut ds, options, None)`. +// Compile-only. + +#[allow( + dead_code, + unreachable_code, + unused_variables, + unused_mut, + clippy::diverging_sub_expression +)] +async fn _compile_compact_files_signature() -> lance::Result<()> { + let mut ds: Dataset = unimplemented!(); + let options: CompactionOptions = CompactionOptions::default(); + let _metrics = compact_files(&mut ds, options, None).await?; + Ok(()) +} + +// --- Guard 8: Dataset::delete returns DeleteResult { new_dataset, num_deleted_rows } --- +// +// `table_store.rs::delete_where` consumes both fields. When MR-A migrates +// `delete_where` to two-phase via `DeleteBuilder::execute_uncommitted`, this +// guard updates to pin the staged path. Compile-only. + +#[allow( + dead_code, + unreachable_code, + unused_variables, + unused_mut, + clippy::diverging_sub_expression +)] +async fn _compile_delete_result_field_shape() -> lance::Result<()> { + let mut ds: Dataset = unimplemented!(); + let result: DeleteResult = ds.delete("x = 1").await?; + let _new_dataset: Arc = result.new_dataset; + let _num_deleted: u64 = result.num_deleted_rows; + Ok(()) +} diff --git a/crates/omnigraph/tests/lifecycle.rs b/crates/omnigraph/tests/lifecycle.rs index d555cbe..e59dbaa 100644 --- a/crates/omnigraph/tests/lifecycle.rs +++ b/crates/omnigraph/tests/lifecycle.rs @@ -3,13 +3,13 @@ mod helpers; use std::fs; use omnigraph::db::{Omnigraph, ReadTarget}; -use omnigraph_compiler::{build_schema_ir, schema_ir_pretty_json}; use omnigraph_compiler::schema::parser::parse_schema; +use omnigraph_compiler::{build_schema_ir, schema_ir_pretty_json}; use helpers::*; #[tokio::test] -async fn init_creates_repo() { +async fn init_creates_graph() { let dir = tempfile::tempdir().unwrap(); let uri = dir.path().to_str().unwrap(); @@ -34,7 +34,7 @@ async fn init_creates_repo() { } #[tokio::test] -async fn open_reads_existing_repo() { +async fn open_reads_existing_graph() { let dir = tempfile::tempdir().unwrap(); let uri = dir.path().to_str().unwrap(); @@ -49,7 +49,7 @@ async fn open_reads_existing_repo() { } #[tokio::test] -async fn open_bootstraps_legacy_schema_state_for_main_only_repo() { +async fn open_bootstraps_legacy_schema_state_for_main_only_graph() { let dir = tempfile::tempdir().unwrap(); let uri = dir.path().to_str().unwrap(); Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); @@ -64,7 +64,7 @@ async fn open_bootstraps_legacy_schema_state_for_main_only_repo() { } #[tokio::test] -async fn open_rejects_legacy_repo_with_public_branch() { +async fn open_rejects_legacy_graph_with_public_branch() { let dir = tempfile::tempdir().unwrap(); let uri = dir.path().to_str().unwrap(); let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); @@ -74,7 +74,7 @@ async fn open_rejects_legacy_repo_with_public_branch() { fs::remove_file(dir.path().join("__schema_state.json")).unwrap(); let err = match Omnigraph::open(uri).await { - Ok(_) => panic!("expected legacy repo with public branch to fail schema bootstrap"), + Ok(_) => panic!("expected legacy graph with public branch to fail schema bootstrap"), Err(err) => err, }; assert!( diff --git a/crates/omnigraph/tests/maintenance.rs b/crates/omnigraph/tests/maintenance.rs index 6bb81f2..3c6ab30 100644 --- a/crates/omnigraph/tests/maintenance.rs +++ b/crates/omnigraph/tests/maintenance.rs @@ -1,6 +1,6 @@ // Maintenance tests: `optimize` (Lance compact_files) and `cleanup` // (Lance cleanup_old_versions) at the graph level. Covers no-op edges -// (empty repo, already-optimized repo), the policy-validation contract on +// (empty graph, already-optimized graph), the policy-validation contract on // `cleanup`, and the keep-versions cap that protects head. mod helpers; @@ -13,7 +13,7 @@ use omnigraph::loader::{LoadMode, load_jsonl}; use helpers::{TEST_DATA, TEST_SCHEMA, count_rows, init_and_load}; #[tokio::test] -async fn optimize_on_empty_repo_returns_stats_per_table_with_no_changes() { +async fn optimize_on_empty_graph_returns_stats_per_table_with_no_changes() { let dir = tempfile::tempdir().unwrap(); let uri = dir.path().to_str().unwrap(); let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); @@ -37,7 +37,7 @@ async fn optimize_after_load_then_again_is_idempotent() { // First pass may compact (load wrote real fragments). let _first = db.optimize().await.unwrap(); - // Second pass should be a no-op: already-compacted repo produces no + // Second pass should be a no-op: already-compacted graph produces no // fragments_removed / fragments_added. let second = db.optimize().await.unwrap(); for s in &second { @@ -119,7 +119,9 @@ async fn cleanup_older_than_zero_preserves_head() { // Smoke test: after aggressive cleanup, we can still read and write the // graph — head wasn't pruned. - load_jsonl(&mut db, TEST_DATA, LoadMode::Merge).await.unwrap(); + load_jsonl(&mut db, TEST_DATA, LoadMode::Merge) + .await + .unwrap(); } #[tokio::test] @@ -151,6 +153,8 @@ async fn cleanup_then_optimize_preserves_rows_and_table_remains_writable() { assert_eq!(count_rows(&db, "node:Company").await, companies_before); // Table is still writable after the cleanup+optimize sequence. - load_jsonl(&mut db, TEST_DATA, LoadMode::Merge).await.unwrap(); + load_jsonl(&mut db, TEST_DATA, LoadMode::Merge) + .await + .unwrap(); assert_eq!(count_rows(&db, "node:Person").await, people_before); } diff --git a/crates/omnigraph/tests/recovery.rs b/crates/omnigraph/tests/recovery.rs index 5ad87e8..a090178 100644 --- a/crates/omnigraph/tests/recovery.rs +++ b/crates/omnigraph/tests/recovery.rs @@ -22,16 +22,16 @@ use helpers::recovery::{RecoveryExpectation, TableExpectation, assert_post_recov const TEST_SCHEMA: &str = include_str!("fixtures/test.pg"); -fn write_sidecar_file(repo_root: &Path, operation_id: &str, json: &str) { - let dir = repo_root.join("__recovery"); +fn write_sidecar_file(graph_root: &Path, operation_id: &str, json: &str) { + let dir = graph_root.join("__recovery"); if !dir.exists() { std::fs::create_dir(&dir).unwrap(); } std::fs::write(dir.join(format!("{}.json", operation_id)), json).unwrap(); } -fn list_recovery_dir(repo_root: &Path) -> Vec { - let dir = repo_root.join("__recovery"); +fn list_recovery_dir(graph_root: &Path) -> Vec { + let dir = graph_root.join("__recovery"); if !dir.exists() { return Vec::new(); } @@ -41,7 +41,7 @@ fn list_recovery_dir(repo_root: &Path) -> Vec { .collect() } -/// Full URI of a node-type Lance dataset under a fresh Omnigraph repo. +/// Full URI of a node-type Lance dataset under a fresh Omnigraph graph. /// Mirrors the `nodes/{fnv1a64-hex(type_name)}` layout in `db/manifest/layout.rs`. fn node_table_uri(root: &str, type_name: &str) -> String { let h: u64 = fnv1a64(type_name.as_bytes()); @@ -283,8 +283,8 @@ async fn recovery_rolls_back_synthetic_drift_on_open() { // ===================================================================== /// Helper: count rows in `_graph_commit_recoveries.lance` at the given root. -async fn count_recovery_audit_rows(repo_root: &Path) -> usize { - let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance"); +async fn count_recovery_audit_rows(graph_root: &Path) -> usize { + let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance"); if !recoveries_dir.exists() { return 0; } @@ -306,9 +306,9 @@ async fn count_recovery_audit_rows(repo_root: &Path) -> usize { /// Helper: read the most recent recovery audit row's `recovery_kind`, /// `recovery_for_actor`, and `operation_id`. Returns `None` if no rows. async fn read_latest_recovery_audit( - repo_root: &Path, + graph_root: &Path, ) -> Option<(String, Option, String, String)> { - let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance"); + let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance"); if !recoveries_dir.exists() { return None; } @@ -357,8 +357,8 @@ async fn read_latest_recovery_audit( /// storage order (multiple batches concatenated). Used by the /// multi-sidecar fresh-snapshot test as a diagnostic alongside the /// post-recovery Lance HEAD assertion. -async fn list_recovery_audit_kinds(repo_root: &Path) -> Vec { - let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance"); +async fn list_recovery_audit_kinds(graph_root: &Path) -> Vec { + let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance"); if !recoveries_dir.exists() { return Vec::new(); } @@ -391,8 +391,8 @@ async fn list_recovery_audit_kinds(repo_root: &Path) -> Vec { } /// Helper: count `_graph_commits.lance` rows tagged with the recovery actor. -async fn count_recovery_actor_commits(repo_root: &Path) -> usize { - let actors_dir = repo_root.join("_graph_commit_actors.lance"); +async fn count_recovery_actor_commits(graph_root: &Path) -> usize { + let actors_dir = graph_root.join("_graph_commit_actors.lance"); if !actors_dir.exists() { return 0; } @@ -908,7 +908,7 @@ async fn recovery_ensure_indices_steady_state_no_sidecar() { /// ran) and rolls back any sibling table's legitimate index work. /// /// Integration verification: after a real init + ensure_indices on a -/// repo where every table is empty, the recovery sweep must complete +/// graph where every table is empty, the recovery sweep must complete /// cleanly (no leftover sidecar) AND the next ensure_indices must also /// leave no sidecar — proving the empty-table-scoping behavior lets /// steady-state runs incur zero sidecar I/O. The @@ -930,7 +930,7 @@ async fn recovery_ensure_indices_handles_empty_tables() { db.ensure_indices().await.unwrap(); assert!( list_recovery_dir(dir.path()).is_empty(), - "ensure_indices on an all-empty repo must not leave a sidecar" + "ensure_indices on an all-empty graph must not leave a sidecar" ); // Reopen + ensure_indices — still steady state, still no sidecar. drop(db); @@ -938,7 +938,7 @@ async fn recovery_ensure_indices_handles_empty_tables() { db.ensure_indices().await.unwrap(); assert!( list_recovery_dir(dir.path()).is_empty(), - "second ensure_indices on an all-empty repo must also not leave a sidecar" + "second ensure_indices on an all-empty graph must also not leave a sidecar" ); } diff --git a/crates/omnigraph/tests/runs.rs b/crates/omnigraph/tests/runs.rs index 4e363bf..f2d7dc3 100644 --- a/crates/omnigraph/tests/runs.rs +++ b/crates/omnigraph/tests/runs.rs @@ -521,6 +521,10 @@ query delete_two_persons($first: String, $second: String) { delete Person where name = $first delete Person where name = $second } + +query update_age_by_name($name: String, $age: I32) { + update Person set { age: $age } where name = $name +} "#; /// D₂: a query mixing inserts/updates with deletes is rejected at parse @@ -1362,3 +1366,85 @@ query insert_then_update_note( .unwrap(); assert_eq!(qr.num_rows(), 0, "letter must not be visible after early error"); } + +/// MR-920 regression: two sequential `update T set {f:v} where x=y` +/// invocations against the same row must both succeed. Pre-fix, the +/// second one failed with `Ambiguous merge inserts are prohibited: +/// multiple source rows match the same target row on (id = "Alice")` +/// even though the scan returned exactly one row. +/// +/// Root cause hypothesis (per MR-920): Lance's +/// `processed_row_ids: Mutex>` +/// (`src/dataset/write/merge_insert.rs:2099`) double-processes the +/// same target row_id against datasets previously rewritten by +/// merge_insert. `SourceDedupeBehavior::FirstSeen` makes Lance skip +/// rather than error. +/// +/// Companion to `consistency.rs::load_merge_repeated_against_overlapping_keys_succeeds` +/// (PR #98 / Window 1 of the bug class via the load surface). +#[tokio::test] +async fn second_sequential_update_on_same_row_succeeds() { + let dir = tempfile::tempdir().unwrap(); + let mut db = init_and_load(&dir).await; + + db.mutate( + "main", + STAGED_QUERIES, + "update_age_by_name", + &mixed_params(&[("$name", "Alice")], &[("$age", 99)]), + ) + .await + .expect("first sequential update on Alice must succeed"); + + let batches = read_table(&db, "node:Person").await; + let alice_count: usize = batches + .iter() + .map(|b| { + let names = b + .column_by_name("name") + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(); + (0..b.num_rows()) + .filter(|i| names.is_valid(*i) && names.value(*i) == "Alice") + .count() + }) + .sum(); + assert_eq!( + alice_count, 1, + "after first update, exactly one Alice row should be visible" + ); + + db.mutate( + "main", + STAGED_QUERIES, + "update_age_by_name", + &mixed_params(&[("$name", "Alice")], &[("$age", 42)]), + ) + .await + .expect("second sequential update on Alice must succeed"); + + let batches = read_table(&db, "node:Person").await; + let mut alice_age: Option = None; + for batch in &batches { + let names = batch + .column_by_name("name") + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(); + let ages = batch + .column_by_name("age") + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(); + for i in 0..batch.num_rows() { + if names.is_valid(i) && names.value(i) == "Alice" && ages.is_valid(i) { + alice_age = Some(ages.value(i)); + } + } + } + assert_eq!(alice_age, Some(42), "Alice's age must reflect the second update"); +} diff --git a/crates/omnigraph/tests/s3_storage.rs b/crates/omnigraph/tests/s3_storage.rs index 5b90022..7e4f0a3 100644 --- a/crates/omnigraph/tests/s3_storage.rs +++ b/crates/omnigraph/tests/s3_storage.rs @@ -7,8 +7,8 @@ use omnigraph::loader::{LoadMode, load_jsonl}; use helpers::*; #[tokio::test(flavor = "multi_thread")] -async fn s3_compatible_repo_lifecycle_works() { - let Some(uri) = s3_test_repo_uri("omnigraph-runtime") else { +async fn s3_compatible_graph_lifecycle_works() { + let Some(uri) = s3_test_graph_uri("omnigraph-runtime") else { eprintln!("skipping s3 runtime test: OMNIGRAPH_S3_TEST_BUCKET is not set"); return; }; @@ -81,7 +81,7 @@ async fn s3_compatible_repo_lifecycle_works() { #[tokio::test(flavor = "multi_thread")] async fn s3_branch_change_merge_flow_works() { - let Some(uri) = s3_test_repo_uri("omnigraph-branching") else { + let Some(uri) = s3_test_graph_uri("omnigraph-branching") else { eprintln!("skipping s3 branch test: OMNIGRAPH_S3_TEST_BUCKET is not set"); return; }; @@ -135,7 +135,7 @@ async fn s3_branch_change_merge_flow_works() { #[tokio::test(flavor = "multi_thread")] async fn s3_public_load_uses_hidden_run_and_publishes() { - let Some(uri) = s3_test_repo_uri("omnigraph-public-load") else { + let Some(uri) = s3_test_graph_uri("omnigraph-public-load") else { eprintln!("skipping s3 public load test: OMNIGRAPH_S3_TEST_BUCKET is not set"); return; }; diff --git a/crates/omnigraph/tests/schema_apply.rs b/crates/omnigraph/tests/schema_apply.rs index 6862c84..cc0cae2 100644 --- a/crates/omnigraph/tests/schema_apply.rs +++ b/crates/omnigraph/tests/schema_apply.rs @@ -74,7 +74,7 @@ async fn apply_schema_rejects_when_non_main_branch_exists() { let err = db.apply_schema(&desired).await.unwrap_err(); assert!( err.to_string() - .contains("schema apply requires a repo with only main") + .contains("schema apply requires a graph with only main") ); } @@ -402,10 +402,7 @@ async fn apply_schema_rejects_adding_a_required_property_without_backfill() { // Add `email: String` (required, non-nullable, no @rename_from). Existing // rows have no value to fill in, so this is unsupported in v1. - let desired = TEST_SCHEMA.replace( - " age: I32?\n}", - " age: I32?\n email: String\n}", - ); + let desired = TEST_SCHEMA.replace(" age: I32?\n}", " age: I32?\n email: String\n}"); let err = db.apply_schema(&desired).await.unwrap_err(); let msg = err.to_string(); assert!( @@ -437,7 +434,10 @@ async fn plan_schema_for_property_type_narrowing_is_not_supported() { .unwrap(); let plan = db.plan_schema(TEST_SCHEMA).await.unwrap(); - assert!(!plan.supported, "narrowing I64 -> I32 must not be supported"); + assert!( + !plan.supported, + "narrowing I64 -> I32 must not be supported" + ); assert!(plan.steps.iter().any(|step| matches!( step, SchemaMigrationStep::UnsupportedChange { code, .. } diff --git a/crates/omnigraph/tests/search.rs b/crates/omnigraph/tests/search.rs index a611a0f..c4454cf 100644 --- a/crates/omnigraph/tests/search.rs +++ b/crates/omnigraph/tests/search.rs @@ -3,7 +3,8 @@ mod helpers; use std::env; use arrow_array::{Array, StringArray}; -use lance_index::{DatasetIndexExt, is_system_index}; +use lance::index::DatasetIndexExt; +use lance_index::is_system_index; use serial_test::serial; use omnigraph::db::Omnigraph; diff --git a/docs/dev/architecture.md b/docs/dev/architecture.md index 16cda04..8b7fca2 100644 --- a/docs/dev/architecture.md +++ b/docs/dev/architecture.md @@ -10,7 +10,7 @@ Three views, increasing zoom: 2. **Layer view** — the eight-layer stack inside one OmniGraph process. 3. **Component zoom-ins** — what's inside each layer. -For runtime flows (read query, mutation), see [`docs/dev/execution.md`](execution.md). For the on-disk layout of a repo, see [`docs/user/storage.md`](../user/storage.md). +For runtime flows (read query, mutation), see [`docs/dev/execution.md`](execution.md). For the on-disk layout of a graph, see [`docs/user/storage.md`](../user/storage.md). L1 (orange in the diagrams) is what we inherit from Lance; L2 (blue) is what OmniGraph adds. The L1/L2 framing is also called out in prose at the bottom of this doc. @@ -63,7 +63,7 @@ flowchart TB subgraph engine[omnigraph engine] plan[exec query and mutation]:::l2 gi[graph index CSR/CSC
RuntimeCache LRU 8]:::l2 - coord[coordinator
ManifestRepo · CommitGraph]:::l2 + coord[coordinator
ManifestCoordinator · CommitGraph]:::l2 end subgraph storage[storage trait — wraps Lance] @@ -132,7 +132,7 @@ flowchart TB subgraph state[graph state] coord[GraphCoordinator]:::l2 - mr[ManifestRepo
db/manifest.rs]:::l2 + mr[ManifestCoordinator
db/manifest.rs]:::l2 cg[CommitGraph
_graph_commits.lance]:::l2 stg[MutationStaging
per-query in-memory accumulator
exec/staging.rs]:::l2 end @@ -166,7 +166,7 @@ Code paths: - Read entry: `Omnigraph::query` at `crates/omnigraph/src/exec/query.rs:7` - Mutation entry: `Omnigraph::mutate` at `crates/omnigraph/src/exec/mutation.rs:511` -- Manifest commit: `ManifestRepo::commit` at `crates/omnigraph/src/db/manifest.rs:280` +- Manifest commit: `ManifestCoordinator::commit` at `crates/omnigraph/src/db/manifest.rs:280` - Graph index: `crates/omnigraph/src/graph_index/` - Loader: `Omnigraph::ingest` at `crates/omnigraph/src/loader/mod.rs:74` diff --git a/docs/dev/branch-protection.md b/docs/dev/branch-protection.md index d1225dc..9b2fa78 100644 --- a/docs/dev/branch-protection.md +++ b/docs/dev/branch-protection.md @@ -16,12 +16,12 @@ This page explains what the policy says and how to change it. | **Disallow force pushes** | `true` | No history rewrites on `main`. | | **Disallow branch deletions** | `true` | `main` cannot be deleted. | | **Required conversation resolution** | `true` | All review comment threads must be resolved before merge. | -| **Enforce on admins** | `true` | Even repo admins go through the gates. The point is no bypasses. | +| **Enforce on admins** | `true` | Even repository admins go through the gates. The point is no bypasses. | | **Required signed commits** | not yet | Not enabled. Would lock out maintainers until everyone enrolls GPG/SSH commit signing. Tracked as a follow-up. | ## How to apply -Run from the repo root: +Run from the repository root: ```bash ./scripts/apply-branch-protection.sh @@ -29,7 +29,7 @@ Run from the repo root: The script reads `.github/branch-protection.json`, strips the human-readable `_comment` field (the GitHub API rejects unknown keys), and PUTs to `repos/ModernRelay/omnigraph/branches/main/protection`. -Requires `gh` authenticated with a token that has admin permissions on the repo. +Requires `gh` authenticated with a token that has admin permissions on the repository. To preview without applying: @@ -57,7 +57,7 @@ Outputs the live policy. Compare against `.github/branch-protection.json` to det - **Audit trail**: `git log .github/branch-protection.json` shows every change with a reviewable diff and a merge commit. - **Disaster recovery**: if branch protection is accidentally removed or weakened via the UI, the JSON is the canonical recovery point. -- **Consistency**: pairs with `.github/codeowners-roles.yml` (the CODEOWNERS source of truth). Repo policy lives in the repo. +- **Consistency**: pairs with `.github/codeowners-roles.yml` (the CODEOWNERS source of truth). Repository policy lives in the repository. ## What this gates @@ -69,7 +69,7 @@ After branch protection is applied, every PR targeting `main` must: 4. Have all review conversations resolved. 5. Be squash- or rebase-merged (no merge commits). -Even repo admins are subject to these rules. +Even repository admins are subject to these rules. ## Subsequent hardening (not in this PR) diff --git a/docs/dev/ci.md b/docs/dev/ci.md index d9855b0..010d2df 100644 --- a/docs/dev/ci.md +++ b/docs/dev/ci.md @@ -2,9 +2,9 @@ `.github/workflows/`: -- **ci.yml**: text-only changes skip; otherwise `cargo test --workspace --locked` on ubuntu-latest with protobuf compiler. OpenAPI-drift check that auto-commits the regenerated `openapi.json` for same-repo PRs. Also runs the AGENTS.md cross-link integrity check (`scripts/check-agents-md.sh`). +- **ci.yml**: text-only changes skip; otherwise `cargo test --workspace --locked` on ubuntu-latest with protobuf compiler. OpenAPI-drift check that auto-commits the regenerated `openapi.json` for same-repository PRs. Also runs the AGENTS.md cross-link integrity check (`scripts/check-agents-md.sh`). - **AWS feature build job**: `cargo build/test -p omnigraph-server --features aws` on ubuntu-latest. -- **RustFS S3 integration**: spins up RustFS in Docker, runs `s3_storage`, `server_opens_s3_repo_directly_and_serves_snapshot_and_read`, and `local_cli_s3_end_to_end_init_load_read_flow`. +- **RustFS S3 integration**: spins up RustFS in Docker, runs `s3_storage`, `server_opens_s3_graph_directly_and_serves_snapshot_and_read`, and `local_cli_s3_end_to_end_init_load_read_flow`. - **release-edge.yml**: on every push to main, retags `edge`, builds Linux/macOS-Intel/macOS-arm64 archives + sha256, publishes a rolling prerelease. - **release.yml**: on `v*` tags, builds the 3-platform matrix and updates the Homebrew tap (`scripts/update-homebrew-formula.sh`) by pushing the regenerated formula to `ModernRelay/homebrew-tap`. - **package.yml**: manual ECR image build; emits two image tags per commit (``, `-aws`) via CodeBuild. diff --git a/docs/dev/codeowners.md b/docs/dev/codeowners.md index ad388ea..edd355d 100644 --- a/docs/dev/codeowners.md +++ b/docs/dev/codeowners.md @@ -2,13 +2,13 @@ `.github/CODEOWNERS` is **generated** — not hand-edited. The source of truth is `.github/codeowners-roles.yml`, expanded by `.github/scripts/render-codeowners.py`. CI rejects drift between the two and rejects direct edits to `CODEOWNERS` that don't accompany a yml change. -This setup gives every role change a reviewable PR and a permanent in-repo audit trail (`git log .github/codeowners-roles.yml`). +This setup gives every role change a reviewable PR and a permanent in-repository audit trail (`git log .github/codeowners-roles.yml`). ## Current roles | Role | Members | Scope | |---|---|---| -| `engineering` | `@aaltshuler` | All code under `crates/**`, repo infrastructure, default for unmapped paths | +| `engineering` | `@aaltshuler` | All code under `crates/**`, repository infrastructure, default for unmapped paths | | `docs` | `@aaltshuler`, `@ragnorc` | `docs/**`, README.md, AGENTS.md, CLAUDE.md, SECURITY.md | GitHub treats multiple owners in a CODEOWNERS line as **"any one of them satisfies the review requirement"**. For docs, either named member can approve. To require N distinct approvers on a specific path, layer a CI check on top (not currently configured). @@ -34,4 +34,4 @@ CI fails the PR if: - **Audit trail**: `git log .github/codeowners-roles.yml` is the canonical record of every role change. The rendered `CODEOWNERS` is a derived artifact. - **Roles are first-class**: paths reference roles, not raw handles. Renaming a person or rotating a role updates one place, not every path. - **Future extension**: scheduled rotation (weekly on-call, quarterly leads) plugs into the same yml without changing the path mappings. Not enabled today. -- **Consistency with the product**: omnigraph itself enforces auditable Cedar policy. The repo's code-owner policy follows the same "policy as reviewed code" pattern. +- **Consistency with the product**: omnigraph itself enforces auditable Cedar policy. The repository's code-owner policy follows the same "policy as reviewed code" pattern. diff --git a/docs/dev/lance.md b/docs/dev/lance.md index 713a7d7..ef83f2c 100644 --- a/docs/dev/lance.md +++ b/docs/dev/lance.md @@ -1,6 +1,6 @@ # Lance Docs Index (for OmniGraph agents) -OmniGraph sits on top of Lance. Many problems — index lifecycle, branching, transactions, fragments, compaction, vector/FTS internals — are answered upstream in Lance's docs, not in this repo. +OmniGraph sits on top of Lance. Many problems — index lifecycle, branching, transactions, fragments, compaction, vector/FTS internals — are answered upstream in Lance's docs, not in this codebase. This file is the curated entry point. **When you hit a Lance-shaped problem, find the matching topic below and fetch the listed URL(s) before guessing.** Don't grep our codebase for behavior that is documented authoritatively in Lance. @@ -156,13 +156,26 @@ If a future need pulls one of these into scope, add a row to the matching domain When Lance ships a major release that changes any of the above (file format bump, new index type, transaction semantics change, new branching primitive), refresh this index in the same change as the omnigraph upgrade. Stale Lance pointers are worse than no pointers. -### Last alignment audit: 2026-05-02 (Lance 4.0.1 upstream; omnigraph pinned at 4.0.0) +### Last alignment audit: 2026-05-22 (Lance 6.0.1 upstream; omnigraph pinned at 6.0.1) -A full read-through of every index page above was performed in the MR-793 cycle. Findings (no code changes required for PR #70): +Migration from Lance 4.0.0 → 6.0.1 landed in this cycle (DataFusion 52 → 53, Arrow 57 → 58, lance-tokenizer 6.0.1 added, tantivy* removed). Direct 4 → 6 jump; v5.x was not used as an intermediate (rationale in `~/.claude/plans/shimmering-percolating-duckling.md`). Behavior-affecting findings: -- The MemWAL "three sub-pages" (Overview / Details / Implementation) turned out to be **anchor sections on the single existing page** at `https://lance.org/format/table/mem_wal/` — not separate URLs. Findings: MemWAL is opt-in (requires an unenforced primary key + explicit shard config; omnigraph doesn't use it), operates intra-table (LSM-tree for streaming writes into one Lance table), and does NOT overlap with MR-847's cross-table manifest-vs-Lance-HEAD recovery problem. MR-847's design is unaffected. -- The distributed-indexing guide names Python APIs (`commit_existing_index_segments`, `merge_existing_index_segments`); the Rust analogues exist via `CreateIndexBuilder::execute_uncommitted` for scalar indices but **`build_index_metadata_from_segments` is `pub(crate)`** and blocks vector-index two-phase commits from outside the lance crate. Filed [lance-format/lance#6666](https://github.com/lance-format/lance/issues/6666) as a companion to [#6658](https://github.com/lance-format/lance/issues/6658). -- "Stable Row ID for Index" is documented as **experimental** in lance-4.0.x. Our datasets enable stable row IDs at the dataset level (`WriteParams::enable_stable_row_ids = true`); confirming whether our created indices opt into stable-row-id mode is a follow-up worth doing before MR-848 (index reconciler) lands. -- Fragment Reuse Index (FRI) is documented as one of three compaction strategies. omnigraph currently uses option 2 (immediate index rewrite at compaction time, via `omnigraph optimize`'s post-compaction rebuild). Adopting FRI is the explicit option for compaction-friendly index updates; relevant to MR-848. +- **DatasetIndexExt moved** from `lance-index` to `lance::index` (Lance PR #6280, v5.0). Six import sites updated. `lance-index::IndexType` and `lance-index::is_system_index` stayed in `lance-index`. `omnigraph-cli` and `omnigraph-server` gained `lance = { workspace = true }` in their dev-dependencies. +- **`DescribeTableResponse` gained `is_only_declared: Option`** (lance-namespace 6.0+, v5.0 PR #6186). Set to `Some(false)` in both `BranchManifestNamespace::describe_table` and `StagedTableNamespace::describe_table` — every table we return is physically materialized via `Dataset::open`, never "declared-only." +- **`MergeInsertBuilder` execute_reader return shape preserved** `(Arc, MergeStats)`; the publisher CAS chain at `db/manifest/publisher.rs:370-391` works unchanged. Pinned by `tests/lance_surface_guards.rs::_compile_merge_insert_builder_method_chain`. +- **`LanceError::TooMuchWriteContention` variant retained** in v6.0.1 (no rename). The typed publisher translation at `db/manifest/publisher.rs:417-430` continues to apply. Pinned by `lance_surface_guards.rs::lance_error_too_much_write_contention_variant_exists`. +- **`ManifestLocation` field shape stable**: `.path: object_store::path::Path`, `.size: Option`, `.e_tag: Option`, `.naming_scheme: ManifestNamingScheme`. Pinned by `lance_surface_guards.rs::manifest_location_field_shape`. +- **`LanceFileVersion::default()` flipped V2_0 → V2_1** (v5.0). No effect — every `data_storage_version` callsite explicitly pins `Some(LanceFileVersion::V2_2)` (load-bearing for blob v2: `Blob v2 requires file version >= 2.2` enforced in `lance/src/dataset/write.rs:748`). +- **`Dataset::checkout_version(N).await?.restore().await?`**: `restore()` takes `&mut self` and returns `Result<()>` (mutates in place, does not consume + return a new dataset). The recovery rollback hammer at `db/manifest/recovery.rs:505-522` continues to work. Pinned by `lance_surface_guards.rs::_compile_checkout_version_then_restore_signature`. +- **`DatasetBuilder::from_namespace(...).with_branch(...).with_version(...).load()`** surface preserved (the namespace builder chain at `db/manifest/namespace.rs:162-174`). Pinned by `lance_surface_guards.rs::_compile_dataset_builder_from_namespace_signature`. +- **`compact_files(&mut ds, CompactionOptions::default(), None)`** signature stable. `CompactionOptions` still does not expose `data_storage_version`; `compact_files` builds its own `WriteParams { ..Default::default() }`. Note: `LanceFileVersion::default()` is now V2_1 in v6, so optimize-rewritten fragments come out at V2_1 by default (was V2_0 in v4). Existing explicit V2_2 pins on creates/appends still apply. +- **`Dataset::delete(predicate)` returns `DeleteResult { new_dataset: Arc, num_deleted_rows: u64 }`** — unchanged shape. Pinned by `lance_surface_guards.rs::_compile_delete_result_field_shape`. MR-A will repurpose this guard to the staged two-phase variant once `DeleteBuilder::execute_uncommitted` migration lands. +- **File reader read methods now async** (Lance PR #6710, v6.0). No effect — omnigraph reaches Lance exclusively through `Dataset::scan` and the staged-write API. +- **Tokenizer vendored as `lance-tokenizer`** (Lance PR #6512, v6.0). No effect — no direct tokenizer imports. +- **Lance #6658 closed** (2026-05-14) but `DeleteBuilder::execute_uncommitted` did **not** ship in v6.0.1 — binary search across the release stream shows it first appears in `v7.0.0-beta.10` (the closing commits landed on main but didn't backport to the 6.x line). Tracked as MR-A: migrate `delete_where` to staged, retire the parse-time D2 mutation rule, extend recovery sidecar coverage. **Gated on the Lance v7.x bump**, not this PR. v7.0.0-rc.1 dropped 2026-05-21. +- **Lance #6666 still open** (`build_index_metadata_from_segments` public): vector-index two-phase blocked; inline `create_vector_index` residual retained. +- **Lance #6877 still open** (`MergeInsertBuilder` dup-rowid): PR #109's `SourceDedupeBehavior::FirstSeen` + `check_batch_unique_by_keys` precondition stay load-bearing. + +Surface guards added: `crates/omnigraph/tests/lance_surface_guards.rs` (8 named guards; 3 runtime + 5 compile-only). Future Lance bumps re-run this file first as the smoke check. Two additional guards from the original plan deferred to follow-up (`manifest_cas_returns_row_level_contention_variant` needs full publisher-race harness; `table_version_metadata_byte_compatible_with_v4` needs `pub(crate)` reach extension). Bump this date stanza on the next alignment pass. diff --git a/docs/dev/testing.md b/docs/dev/testing.md index 14b66ed..e6989ba 100644 --- a/docs/dev/testing.md +++ b/docs/dev/testing.md @@ -22,7 +22,7 @@ The engine's `tests/` is the principal coverage surface; most graph-shaped behav | `merge_truth_table.rs` | Merge-pair truth table (MR-786): all 9×9 `(left_op, right_op)` cells from `{noop, addNode, removeNode, addEdge, removeEdge, setProperty, dropProperty, addLabel, removeLabel}`. Adding a new op to `OpVariant` forces a compile error in `build_case` until the new row + column are dispositioned. 36 executable cells run through real `branch_merge` with a structured oracle (`MergeOutcome` / `MergeConflictKind` + graph-state assert); 45 cells involving `dropProperty`/`addLabel`/`removeLabel` are recorded as `Unsupported` until the mutation grammar grows. | | `runs.rs` | Direct-publish writes: cancellation, concurrent-writer CAS, multi-statement atomicity, MR-794 staged-write rewire (D₂ rejection, insert+update coalesce, multi-append coalesce, partial-failure recovery, load RI/cardinality recovery) | | `staged_writes.rs` | TableStore staged-write primitives (`stage_append`, `stage_merge_insert`, `commit_staged`, `scan_with_staged`, `count_rows_with_staged`) — primitive-level only; engine code uses the in-memory `MutationStaging` accumulator instead | -| `lifecycle.rs` | Repo lifecycle, schema state | +| `lifecycle.rs` | Graph lifecycle, schema state | | `point_in_time.rs` | Snapshots, time travel (`snapshot_at_version`, `entity_at`) | | `changes.rs` | `diff_between` / `diff_commits` | | `consistency.rs` | Cross-table snapshot isolation, atomic publish | @@ -31,7 +31,7 @@ The engine's `tests/` is the principal coverage surface; most graph-shaped behav | `traversal.rs` | `Expand`, variable-length hops, anti-join | | `aggregation.rs` | `count`, `sum`, `avg`, `min`, `max` | | `export.rs` | NDJSON streaming export filters | -| `s3_storage.rs` | S3-backed repo (skipped unless `OMNIGRAPH_S3_TEST_BUCKET` is set) | +| `s3_storage.rs` | S3-backed graph (skipped unless `OMNIGRAPH_S3_TEST_BUCKET` is set) | | `lance_version_columns.rs` | Per-row `_row_last_updated_at_version` behavior | | `validators.rs` | Schema constraint enforcement (enum, range, unique, cardinality) across JSONL, insert, update paths | | `maintenance.rs` | `optimize` (compaction) + `cleanup` (version GC): empty/idempotent/no-op edges, policy validation, head preservation | @@ -45,7 +45,7 @@ The engine's `tests/` is the principal coverage surface; most graph-shaped behav ## Test helpers -- **Engine** — `crates/omnigraph/tests/helpers/mod.rs`: `init_and_load()` (bootstrap a temp repo + load standard fixture), `snapshot_main()`, `snapshot_branch()`, query/mutation runners, row collection and counting. Use these instead of hand-rolling. +- **Engine** — `crates/omnigraph/tests/helpers/mod.rs`: `init_and_load()` (bootstrap a temp graph + load standard fixture), `snapshot_main()`, `snapshot_branch()`, query/mutation runners, row collection and counting. Use these instead of hand-rolling. - **CLI** — `crates/omnigraph-cli/tests/support/mod.rs`: `Command`-style wrapper for invoking `omnigraph`, server-process spawning, fixture resolution, output assertion helpers. - **Server** — no shared helpers; server tests call the `Omnigraph` engine API directly and exercise endpoints over the wire. @@ -63,14 +63,14 @@ The engine's `tests/` is the principal coverage surface; most graph-shaped behav CI runs three S3-backed tests against a containerized RustFS server (`.github/workflows/ci.yml` → `rustfs_integration` job): - `cargo test -p omnigraph-engine --test s3_storage` -- `cargo test -p omnigraph-server --test server server_opens_s3_repo_directly_and_serves_snapshot_and_read` +- `cargo test -p omnigraph-server --test server server_opens_s3_graph_directly_and_serves_snapshot_and_read` - `cargo test -p omnigraph-cli --test system_local local_cli_s3_end_to_end_init_load_read_flow` Locally, set `OMNIGRAPH_S3_TEST_BUCKET` (and the usual `AWS_*` vars including `AWS_ENDPOINT_URL_S3` for non-AWS) before running. Without those, S3 tests skip gracefully. ## OpenAPI drift -`crates/omnigraph-server/tests/openapi.rs` regenerates `openapi.json` and diffs against the checked-in copy. CI auto-commits the regeneration on same-repo PRs and otherwise runs in strict-check mode (env: `OMNIGRAPH_UPDATE_OPENAPI`). +`crates/omnigraph-server/tests/openapi.rs` regenerates `openapi.json` and diffs against the checked-in copy. CI auto-commits the regeneration on same-repository PRs and otherwise runs in strict-check mode (env: `OMNIGRAPH_UPDATE_OPENAPI`). ## Examples & benches @@ -79,7 +79,7 @@ Locally, set `OMNIGRAPH_S3_TEST_BUCKET` (and the usual `AWS_*` vars including `A ## Coverage tooling — what's missing -There is **no** coverage tooling in the repo today: no `tarpaulin.toml`, no `codecov.yml`, no coverage CI step. If you want to know whether your change is covered, the answer comes from reading and running the relevant integration tests, not from a tool. +There is **no** coverage tooling in the repository today: no `tarpaulin.toml`, no `codecov.yml`, no coverage CI step. If you want to know whether your change is covered, the answer comes from reading and running the relevant integration tests, not from a tool. If introducing coverage tooling is in scope for your task, the natural first step is `cargo-llvm-cov` wired into a separate CI job, and a per-crate threshold rather than a global one. @@ -97,7 +97,7 @@ How to check: - *Existing test covers the area but not your case* → **add an assertion or a fixture row to the existing test**, don't write a new function with `init_and_load()` again. - *No existing coverage in any test file* → only then write a new test; put it in the file that owns the area, or open a new file only if the area itself is new. -Three duplicated `init_and_load() → run_query → assert_eq` blocks where one parameterized test would do is the most common form of test rot in this repo. Don't add to it. +Three duplicated `init_and_load() → run_query → assert_eq` blocks where one parameterized test would do is the most common form of test rot in this repository. Don't add to it. ## Before-every-task checklist @@ -106,7 +106,7 @@ When you pick up any change, walk through this: 1. **Find existing coverage** (per the principle above). Don't just look at the first test file by name — grep for the symbol you're touching across every crate's `tests/`. 2. **Run those tests locally before editing.** `cargo test --workspace --locked` for the broad pass; `-p --test ` for a focused loop. Confirm a clean baseline. 3. **Decide extend-vs-new** explicitly. If you can extend an existing test (assertion, fixture row, parameterization), do that. Only add a new test fn or new file if no existing one owns the area. -4. **Reuse the helpers.** `init_and_load()`, fixture files, the CLI `support` harness — re-use them. Don't bootstrap a fresh repo by hand if a helper exists. +4. **Reuse the helpers.** `init_and_load()`, fixture files, the CLI `support` harness — re-use them. Don't bootstrap a fresh graph by hand if a helper exists. 5. **Mind the boundary.** Per [docs/dev/invariants.md](invariants.md), test at the layer the change lives at — planner-level changes deserve planner-level tests, not just end-to-end. 6. **For substrate-touching changes** (Lance behavior), reach for `failpoints` or fixture-driven scenarios, not stubbed-out mocks. 7. **For server / API changes**, confirm the OpenAPI regeneration happens in `openapi.rs` and that the diff lands in `openapi.json`. diff --git a/docs/releases/v0.5.0.md b/docs/releases/v0.5.0.md new file mode 100644 index 0000000..16e284e --- /dev/null +++ b/docs/releases/v0.5.0.md @@ -0,0 +1,171 @@ +# Omnigraph v0.5.0 + +Omnigraph v0.5.0 is a substrate, security, and migration-safety release. It +jumps the storage substrate from Lance 4 to Lance 6.0.1 (DataFusion 52 → 53, +Arrow 57 → 58), introduces engine-wide Cedar policy enforcement on every +authoring path, and ships a structured schema-lint v1 chassis with +code-tagged diagnostics, soft drops, and an explicit `--allow-data-loss` +flag for destructive migrations. + +## Highlights + +- **Lance 6.0.1 substrate**: bump from Lance 4.0.0 → 6.0.1, DataFusion 52 → + 53, Arrow 57 → 58. New optimizer rules (vectorized `IN`-list eq kernel, + `PhysicalExprSimplifier`, push-limit-into-hash-join, CASE-NULL shortcut) + reach predicates that flow through the engine. `lance-tokenizer` replaces + tantivy internally; FTS behavior preserved. +- **Cedar policy engine**: a new `omnigraph-policy` crate wires + `Omnigraph::enforce(action, scope, actor)` into every `_as` writer + (`mutate_as`, `load_as`, `apply_schema_as`, `branch_create_as`, + `branch_merge_as`, `branch_delete_as`, plus the load and change + variants). The HTTP server defaults to deny-all when no Cedar policy is + configured; a YAML policy file is required to enable writes. Actor + identity comes only from signed token claims — clients cannot set actor + identity directly. +- **Schema lint v1 chassis**: diagnostics now carry stable codes of the form + `OG-XXX-NNN` instead of free-form messages. `omnigraph schema plan` and + `apply` understand soft drops on properties and types — destructive drops + require the new `--allow-data-loss` flag (Hard mode) at the CLI and an + equivalent JSON flag over HTTP. +- **Structured filter pushdown**: query-language predicates lower to + DataFusion `Expr` and push down through Lance's `Scanner::filter_expr` + instead of being flattened to SQL strings. This unlocks `CompOp::Contains` + pushdown (via `array_has`), which previously fell through to in-memory + post-scan filtering, and lets the DataFusion 53 optimizer rules above act + on our predicates. +- **HTTP `allow_data_loss` parity**: the destructive-drop guard now exists + on both the CLI (`--allow-data-loss`) and HTTP (`allow_data_loss: true` in + the schema-apply request body). +- **Inline query strings on CLI and HTTP**: `omnigraph read` / + `omnigraph mutate` and the corresponding HTTP endpoints accept inline + `.gq` source, not just a file path. Easier ad-hoc queries, clearer + request logs. +- **Browser CORS layer**: optional CORS layer on `omnigraph-server` for + browser-based UIs, gated by `OMNIGRAPH_CORS_ORIGINS`. +- **Merge-insert dup-rowid fix**: Lance's `MergeInsertBuilder` could surface + spurious `"Ambiguous merge inserts"` errors on sequential merges against + rows previously rewritten by `merge_insert`. The engine now opts into + `SourceDedupeBehavior::FirstSeen` with a `check_batch_unique_by_keys` + fail-fast precondition that guarantees source-side dedup happens before + Lance sees the batch. +- **Branch-merge error-path recovery**: a branch merge that failed + mid-flight could leave the in-process coordinator pointing at a stale + active branch. The error path now restores the prior coordinator, + matching the success path's invariant. +- **Branch merge with blob columns**: external blob URIs are now + materialized correctly during branch merge instead of being dropped or + pointing at the source branch. +- **Lance API surface guards**: a new test file + (`crates/omnigraph/tests/lance_surface_guards.rs`) pins eight specific + Lance API surfaces (`LanceError::TooMuchWriteContention`, + `ManifestLocation` fields, `MergeInsertBuilder` return shape, + `WriteParams::default`, `compact_files` signature, etc.) so the next + Lance bump fails compile or runtime on any silent drift rather than + producing wrong-state recovery in production. + +## Behavior changes + +- **On-disk format unchanged**: existing v0.4.2 datasets open unchanged. + The Lance file format pin stays at V2_2 (required by Lance's blob v2 + feature). +- **`omnigraph-server` defaults to deny-all under `--policy`**: starting a + server with the policy feature enabled but no Cedar YAML policy + configured rejects every write. Operators must supply a policy file to + authorize anything. +- **Schema-lint diagnostics carry stable codes**: messages now lead with + `OG-XXX-NNN`. CI parsers or tooling that keyed off the v0.4.2 free-form + text need to switch to code-based matching. +- **Destructive schema drops require `--allow-data-loss`**: dropping a + property or type returns a structured diagnostic by default. + `omnigraph schema apply --allow-data-loss` (CLI) or + `{"allow_data_loss": true}` (HTTP) opts into Hard mode. +- **`HashJoinExec` null-aware semantics on anti-join**: a side effect of + the DataFusion 53 bump — `NOT IN` semantics under null-valued anti-join + columns are now correct per SQL standard. Queries that depended on the + prior behavior would have been incorrect. + +## Upgrade Notes + +### Migration + +- No data migration. v0.4.2 repos open directly on v0.5.0. + +### Clients + +- HTTP and SDK clients should switch any string-matching schema-lint + parsing to code-based matching against the `OG-XXX-NNN` prefix. +- Clients exercising destructive schema drops (`DropProperty`, `DropType`) + must add the `allow_data_loss` request field (HTTP) or + `--allow-data-loss` flag (CLI). Default is soft-drop-or-reject. +- Clients consuming `mutate_as` / `load_as` / `apply_schema_as` / branch + authoring APIs now flow through the policy enforcer. Anything bypassing + authorization on v0.4.2 will be rejected on v0.5.0 once a policy is + configured. + +### Operators + +- Configure a Cedar policy YAML for production servers before enabling + writes; deny-all is the new default. The `omnigraph policy validate` / + `test` / `explain` CLI commands are unchanged. +- Bearer tokens continue to be the actor-identity source; review the + signed-token-claim-only invariant in `docs/dev/invariants.md` if you've + built custom authentication. +- If your local CI uses RustFS for S3-compatible storage testing, our CI + pins `rustfs/rustfs:1.0.0-beta.3` (the last known-good tag before the + upstream credentials-policy change). Mirror the pin or set + `RUSTFS_ALLOW_INSECURE_DEFAULT_CREDENTIALS=true` for the new image + versions. + +## Tests added or strengthened + +- `crates/omnigraph/tests/lance_surface_guards.rs` — 8 named guards pinning + Lance API surfaces against silent drift on future bumps. +- `crates/omnigraph/tests/policy_engine_chassis.rs` — engine-level policy + enforcement coverage; complements the existing HTTP policy tests. +- Policy chassis e2e gap-fills — branch-merge, branch-create, branch-delete + policy paths now have explicit end-to-end tests over HTTP and CLI. +- Merge-pair truth table — exhaustive op-variant matrix for three-way + merge across `noop`, `addNode`, `removeNode`, `addEdge`, `removeEdge`, + `setProperty`, `dropProperty`, `addLabel`, `removeLabel`; the build + fails to compile when a new op variant is added without dispositioning + every pairing. +- Merge-insert: regression for the dup-rowid bug class on the load surface + (`load_merge_repeated_against_overlapping_keys_succeeds`), the update + surface (`second_sequential_update_on_same_row_succeeds`), and the + upstream-Lance-gap canary + (`load_merge_window_2_documents_upstream_lance_gap`). +- Maintenance + destructive-migration coverage — `omnigraph optimize` / + `cleanup` boundary cases, plus schema-apply soft-drop and Hard-mode + paths. +- Stable-row-id preservation across `stage_overwrite` — pins the invariant + that staged overwrites carry stable row IDs through to the committed + fragment set. +- `CompOp::Contains` pushdown regression + (`ir_filter_with_list_contains_pushes_down`) — pins the new structured + Expr pushdown path that retired the in-memory fallback. + +## Included Changes + +- Lance 4 → 6.0.1, DataFusion 52 → 53, Arrow 57 → 58 substrate upgrade. +- `omnigraph-policy` crate with engine-wide Cedar enforcement and + signed-token-claim-only actor identity. +- Schema-lint v1 chassis with `OG-XXX-NNN` codes, soft `DropProperty` / + `DropType` semantics, and `--allow-data-loss` for Hard mode. +- HTTP `allow_data_loss` request field parity with the CLI flag. +- Structured DataFusion `Expr` filter pushdown via + `Scanner::filter_expr`, with `CompOp::Contains` lowered through + `array_has`. +- Inline `.gq` source acceptance on CLI and HTTP read/mutate endpoints. +- Optional CORS layer on `omnigraph-server` for browser UIs. +- Bug fixes: merge-insert dup-rowid (FirstSeen + uniqueness precondition), + branch-merge coordinator restore on error, blob-column materialization + during branch merge. +- New Lance API surface-guard test file as the canary for future Lance + bumps. +- Recovery-sidecar coverage extended across the four write paths + (`MutationStaging::finalize`, `schema_apply`, `branch_merge`, + `ensure_indices`) with failpoint regression tests. +- CI: pinned `rustfs/rustfs:1.0.0-beta.3` after the upstream `:latest` + introduced a credentials-policy change. +- Version bump to `0.5.0` across workspace crates, `Cargo.lock`, + `openapi.json`, and the `AGENTS.md` surveyed version. diff --git a/docs/releases/v0.6.0.md b/docs/releases/v0.6.0.md new file mode 100644 index 0000000..80cda02 --- /dev/null +++ b/docs/releases/v0.6.0.md @@ -0,0 +1,19 @@ +# Omnigraph v0.6.0 + +## Breaking Changes + +- Renamed the Cedar resource entity from `Omnigraph::Repo` to `Omnigraph::Graph`. +- Renamed policy API terminology from `repo_id` to `graph_id` on `PolicyCompiler::compile` and `PolicyEngine::load`. +- Renamed query-lint schema source JSON from `"repo"` to `"graph"` for `schema_source.kind`. + +## User Impact + +- No on-disk migration is required. Existing `.omni` graphs continue to open with the same storage layout. +- Supported YAML policy authoring is unchanged because the YAML schema does not expose the Cedar entity type name. +- Operators with unsupported raw Cedar policy files should update `Omnigraph::Repo` + resource references to `Omnigraph::Graph`. + +## Documentation + +- Public docs, CLI help, examples, server docs, and test helpers now consistently use "graph" for the OmniGraph data artifact. +- GitHub/source repository terminology remains spelled out as "repository" where needed. diff --git a/docs/user/audit.md b/docs/user/audit.md index 80ac137..e8abe5b 100644 --- a/docs/user/audit.md +++ b/docs/user/audit.md @@ -4,4 +4,4 @@ - `_as` variants of every write API let callers override the actor: `mutate_as`, `ingest_as`, `branch_merge_as`, `apply_schema_as`, etc. - Actor IDs are persisted on `GraphCommit.actor_id` with split storage in `_graph_commit_actors.lance` (the commit graph is split into `_graph_commits.lance` for the linkage and `_graph_commit_actors.lance` for the actor map). - HTTP server uses the bearer-token actor automatically; CLI uses the local user / explicit env (no implicit actor). -- Pre-v0.4.0 repos also stored actor IDs on `RunRecord.actor_id` in `_graph_runs.lance` / `_graph_run_actors.lance`. The Run state machine was removed in MR-771; those files are inert post-v0.4.0 and reclaimed by MR-770's production sweep. +- Pre-v0.4.0 graphs also stored actor IDs on `RunRecord.actor_id` in `_graph_runs.lance` / `_graph_run_actors.lance`. The Run state machine was removed in MR-771; those files are inert post-v0.4.0 and reclaimed by MR-770's production sweep. diff --git a/docs/user/cli-reference.md b/docs/user/cli-reference.md index a4e3dad..0326e64 100644 --- a/docs/user/cli-reference.md +++ b/docs/user/cli-reference.md @@ -8,7 +8,7 @@ A reference for the `omnigraph` binary's command surface and `omnigraph.yaml` sc | Command | Purpose | |---|---| -| `init` | `--schema ` → initialize a repo (also scaffolds `omnigraph.yaml` if missing) | +| `init` | `--schema ` → initialize a graph (also scaffolds `omnigraph.yaml` if missing) | | `load` | bulk load a branch (`--mode overwrite\|append\|merge`) | | `ingest` | branch-creating transactional load (`--from `) | | `query` (alias: `read`) | run named read query; source via `--query `, `-e`/`--query-string `, or `--alias ` (exactly one). `read` is the deprecated previous name and prints a one-line warning to stderr | @@ -19,7 +19,7 @@ A reference for the `omnigraph` binary's command surface and `omnigraph.yaml` sc | `commit list \| show` | inspect commit graph | | `run list \| show \| publish \| abort` | transactional run ops | | `schema plan \| apply \| show (alias: get)` | migrations | -| `lint` (alias: `check`) | offline / repo-backed query validation. Replaces `query lint` / `query check`, which are kept as deprecated argv-level shims that print a one-line warning and rewrite to `omnigraph lint` | +| `lint` (alias: `check`) | offline / graph-backed query validation. Replaces `query lint` / `query check`, which are kept as deprecated argv-level shims that print a one-line warning and rewrite to `omnigraph lint` | | `optimize` | non-destructive Lance compaction | | `cleanup --keep N --older-than 7d --confirm` | destructive version GC | | `embed` | offline JSONL embedding pipeline | diff --git a/docs/user/cli.md b/docs/user/cli.md index d680cf6..70b5d83 100644 --- a/docs/user/cli.md +++ b/docs/user/cli.md @@ -1,13 +1,13 @@ # CLI Guide -## Core Repo Flow +## Core Graph Flow ```bash -omnigraph init --schema ./schema.pg ./repo.omni -omnigraph load --data ./data.jsonl --mode overwrite ./repo.omni -omnigraph snapshot ./repo.omni --branch main --json -omnigraph query --uri ./repo.omni --query ./queries.gq --name get_person --params '{"name":"Alice"}' -omnigraph mutate --uri ./repo.omni --query ./queries.gq --name insert_person --params '{"name":"Mina","age":28}' +omnigraph init --schema ./schema.pg ./graph.omni +omnigraph load --data ./data.jsonl --mode overwrite ./graph.omni +omnigraph snapshot ./graph.omni --branch main --json +omnigraph query --uri ./graph.omni --query ./queries.gq --name get_person --params '{"name":"Alice"}' +omnigraph mutate --uri ./graph.omni --query ./queries.gq --name insert_person --params '{"name":"Mina","age":28}' ``` `omnigraph query` is the canonical read command (pairs with `POST /query`); @@ -21,11 +21,11 @@ For ad-hoc reads and mutations (REPLs, AI agents, one-off scripts), pass the GQ source inline with `-e` / `--query-string` instead of a file path: ```bash -omnigraph query --uri ./repo.omni \ +omnigraph query --uri ./graph.omni \ -e 'query find($name: String) { match { $p: Person { name: $name } } return { $p.name, $p.age } }' \ --params '{"name":"Alice"}' -omnigraph mutate --uri ./repo.omni \ +omnigraph mutate --uri ./graph.omni \ -e 'query add($name: String, $age: I32) { insert Person { name: $name, age: $age } }' \ --params '{"name":"Inline","age":42}' ``` @@ -38,22 +38,22 @@ only the source loader changes. ## Branching And Reviewable Data Flows ```bash -omnigraph branch create --uri ./repo.omni --from main feature-x -omnigraph branch list --uri ./repo.omni -omnigraph branch merge --uri ./repo.omni feature-x --into main +omnigraph branch create --uri ./graph.omni --from main feature-x +omnigraph branch list --uri ./graph.omni +omnigraph branch merge --uri ./graph.omni feature-x --into main -omnigraph ingest --data ./batch.jsonl --branch review/import-2026-04-09 ./repo.omni -omnigraph export ./repo.omni --branch main --type Person > people.jsonl -omnigraph commit list ./repo.omni --branch main --json -omnigraph commit show --uri ./repo.omni --json +omnigraph ingest --data ./batch.jsonl --branch review/import-2026-04-09 ./graph.omni +omnigraph export ./graph.omni --branch main --type Person > people.jsonl +omnigraph commit list ./graph.omni --branch main --json +omnigraph commit show --uri ./graph.omni --json ``` ## Remote Server Mode -Serve a repo: +Serve a graph: ```bash -omnigraph-server ./repo.omni --bind 127.0.0.1:8080 +omnigraph-server ./graph.omni --bind 127.0.0.1:8080 ``` Read through the HTTP API: @@ -73,22 +73,22 @@ and configure the matching `bearer_token_env` in `omnigraph.yaml`. ```bash omnigraph lint --query ./queries.gq --schema ./schema.pg --json -omnigraph check --query ./queries.gq ./repo.omni --json +omnigraph check --query ./queries.gq ./graph.omni --json -omnigraph schema plan --schema ./next.pg ./repo.omni --json -omnigraph schema apply --schema ./next.pg ./repo.omni --json +omnigraph schema plan --schema ./next.pg ./graph.omni --json +omnigraph schema apply --schema ./next.pg ./graph.omni --json omnigraph policy validate --config ./omnigraph.yaml omnigraph policy test --config ./omnigraph.yaml omnigraph policy explain --config ./omnigraph.yaml --actor act-alice --action read --branch main -omnigraph commit list ./repo.omni --json -omnigraph commit show --uri ./repo.omni --json +omnigraph commit list ./graph.omni --json +omnigraph commit show --uri ./graph.omni --json ``` (The legacy `omnigraph run list/show/publish/abort` subcommands were removed in MR-771; mutations and loads publish atomically and the commit graph (`omnigraph commit list`) is the audit surface.) -`query lint` and `query check` are the same command surface. In v1, repo-backed -lint uses local or `s3://` repo URIs; HTTP targets are only supported when you +`query lint` and `query check` are the same command surface. In v1, graph-backed +lint uses local or `s3://` graph URIs; HTTP targets are only supported when you also pass `--schema`. ## Config diff --git a/docs/user/deployment.md b/docs/user/deployment.md index e611245..7857077 100644 --- a/docs/user/deployment.md +++ b/docs/user/deployment.md @@ -8,8 +8,8 @@ internal deploy automation. Omnigraph supports two broad deployment shapes: -- local directory repos -- `s3://` repos on AWS S3 or S3-compatible object stores +- local directory graphs +- `s3://` graphs on AWS S3 or S3-compatible object stores The server binary and container image expose the same HTTP surface. @@ -20,18 +20,18 @@ Build or install: - `omnigraph` - `omnigraph-server` -Run against a local repo: +Run against a local graph: ```bash -omnigraph-server ./repo.omni --bind 0.0.0.0:8080 +omnigraph-server ./graph.omni --bind 0.0.0.0:8080 ``` -Run against an object-store-backed repo: +Run against an object-store-backed graph: ```bash OMNIGRAPH_SERVER_BEARER_TOKEN="change-me" \ AWS_REGION="us-east-1" \ -omnigraph-server s3://my-bucket/repos/example/releases/2026-04-10-v0.1.0 \ +omnigraph-server s3://my-bucket/graphs/example/releases/2026-04-10-v0.1.0 \ --bind 0.0.0.0:8080 ``` @@ -46,7 +46,7 @@ curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/ The bootstrap: - starts a local RustFS-backed object store -- creates a bucket and S3-backed Omnigraph repo +- creates a bucket and S3-backed Omnigraph graph - loads the checked-in context fixture - starts `omnigraph-server` on `127.0.0.1:8080` @@ -60,8 +60,8 @@ Useful overrides: - `WORKDIR=/path/to/state` - `BUCKET=omnigraph-local` -- `PREFIX=repos/context` -- `RESET_REPO=1` to delete an existing partially initialized repo prefix before recreating it +- `PREFIX=graphs/context` +- `RESET_REPO=1` to delete an existing partially initialized graph prefix before recreating it - `BIND=127.0.0.1:8080` - `RUSTFS_CONTAINER_NAME=omnigraph-rustfs-demo` @@ -76,7 +76,7 @@ If `aws` is not installed, the script attempts a user-local AWS CLI install via running. If a previous bootstrap left objects behind under the selected `PREFIX` but did -not finish initializing the repo, rerun with `RESET_REPO=1` or choose a new +not finish initializing the graph, rerun with `RESET_REPO=1` or choose a new `PREFIX`. ## Container Deployment @@ -87,23 +87,23 @@ Build the image: docker build -t omnigraph-server:local . ``` -Run against a local repo: +Run against a local graph: ```bash docker run --rm -p 8080:8080 \ - -v "$PWD/repo.omni:/data/repo.omni" \ + -v "$PWD/graph.omni:/data/graph.omni" \ omnigraph-server:local \ - /data/repo.omni --bind 0.0.0.0:8080 + /data/graph.omni --bind 0.0.0.0:8080 ``` -Run against an S3-backed repo: +Run against an S3-backed graph: ```bash docker run --rm -p 8080:8080 \ -e OMNIGRAPH_SERVER_BEARER_TOKEN="change-me" \ -e AWS_REGION="us-east-1" \ omnigraph-server:local \ - s3://my-bucket/repos/example/releases/2026-04-10-v0.1.0 \ + s3://my-bucket/graphs/example/releases/2026-04-10-v0.1.0 \ --bind 0.0.0.0:8080 ``` @@ -154,7 +154,7 @@ Manager secret whose `SecretString` is a JSON object of `{"actor_id": "token", ...}`: ```bash -omnigraph-server-aws s3://my-bucket/repos/example ... +omnigraph-server-aws s3://my-bucket/graphs/example ... # Environment: # OMNIGRAPH_SERVER_BEARER_TOKENS_AWS_SECRET=arn:aws:secretsmanager:us-east-1:123456789012:secret:omnigraph-tokens-AbCdEf ``` diff --git a/docs/user/embeddings.md b/docs/user/embeddings.md index 596a6a0..382e683 100644 --- a/docs/user/embeddings.md +++ b/docs/user/embeddings.md @@ -22,7 +22,7 @@ Mark a Vector property with `@embed("source_text_property")`. At ingest, the eng ## CLI `omnigraph embed` (offline file pipeline) -Operates on **JSONL files** (not on a repo). Three modes (mutually exclusive): +Operates on **JSONL files** (not on a graph). Three modes (mutually exclusive): - (default) `fill_missing` — only embed rows whose target field is empty - `--reembed-all` — overwrite all diff --git a/docs/user/index.md b/docs/user/index.md index 45d8f01..1b93efa 100644 --- a/docs/user/index.md +++ b/docs/user/index.md @@ -18,11 +18,11 @@ of MRs, internal recovery mechanics, or contributor-only invariants. | Write queries and mutations | [query-language.md](query-language.md) | | Use embeddings | [embeddings.md](embeddings.md) | -## Operate A Repo +## Operate A Graph | Goal | Read | |---|---| -| Understand repo layout and URI support | [storage.md](storage.md) | +| Understand graph layout and URI support | [storage.md](storage.md) | | Work with branches, commits, and snapshots | [branches-commits.md](branches-commits.md) | | Coordinate multi-query workflows | [transactions.md](transactions.md) | | Read diffs and change feeds | [changes.md](changes.md) | diff --git a/docs/user/server.md b/docs/user/server.md index e0db78f..633ac70 100644 --- a/docs/user/server.md +++ b/docs/user/server.md @@ -1,6 +1,6 @@ # HTTP Server (`omnigraph-server`) -Axum 0.8 + tokio + utoipa-generated OpenAPI. Single repo per process; deploy multiple processes for multi-tenant. +Axum 0.8 + tokio + utoipa-generated OpenAPI. Single graph per process; deploy multiple processes for multi-tenant. ## Endpoint inventory @@ -136,7 +136,7 @@ See [deployment.md](deployment.md) for token-source operational details. - `tower_http::TraceLayer::new_for_http()` - Policy decisions logged at INFO level with actor, action, branch, decision, matched rule -- Startup logs: token source name, repo URI, bind address +- Startup logs: token source name, graph URI, bind address - Graceful SIGINT shutdown ## Not implemented (by design or "TBD") @@ -148,4 +148,4 @@ See [deployment.md](deployment.md) for token-source operational details. admission control" above). No global rate limiter is configured; add `tower_http::limit` if a graph-wide cap is needed. - Pagination — none (commits/branches return everything; export streams). -- Multi-tenant routing — one repo per process. +- Multi-tenant routing — one graph per process. diff --git a/docs/user/storage.md b/docs/user/storage.md index b284bc2..c22d4d6 100644 --- a/docs/user/storage.md +++ b/docs/user/storage.md @@ -7,7 +7,7 @@ Every node type and every edge type is its own Lance dataset: - **Columnar Arrow storage**: each property is a column; nullable per Arrow schema. - **Fragments**: data is partitioned into fragments; new writes create new fragments. - **Manifest versioning**: every commit produces a new dataset version; old versions remain readable. -- **Stable row IDs**: `enable_stable_row_ids: true` is set on every Lance dataset OmniGraph creates — node and edge data tables, `__manifest`, `_graph_commits.lance`, `_graph_commit_recoveries.lance`, and any future system tables. This is an architectural invariant: the flag is one-way at dataset create per Lance's row-id-lineage spec, so a future change that introduces a Lance dataset must preserve it. Consequences: `_row_created_at_version` and `_row_last_updated_at_version` are available on every dataset (load-bearing for change-feed validators); `CreateIndex × Rewrite` is not a retryable conflict, so indices survive `omnigraph optimize` without needing the Fragment Reuse Index; readers must use a Lance build that recognises the flag (our pinned 4.0.0 is fine). Pre-0.4.x repos created before this code path settled may have datasets without the flag and cannot be retrofitted in place — the supported path is dump-and-reload. The `stage_overwrite` rewrite path (used by `schema_apply`) preserves the flag through `Operation::Overwrite`; pinned by `stage_overwrite_preserves_stable_row_ids` in `crates/omnigraph/tests/staged_writes.rs`. +- **Stable row IDs**: `enable_stable_row_ids: true` is set on every Lance dataset OmniGraph creates — node and edge data tables, `__manifest`, `_graph_commits.lance`, `_graph_commit_recoveries.lance`, and any future system tables. This is an architectural invariant: the flag is one-way at dataset create per Lance's row-id-lineage spec, so a future change that introduces a Lance dataset must preserve it. Consequences: `_row_created_at_version` and `_row_last_updated_at_version` are available on every dataset (load-bearing for change-feed validators); `CreateIndex × Rewrite` is not a retryable conflict, so indices survive `omnigraph optimize` without needing the Fragment Reuse Index; readers must use a Lance build that recognises the flag (our pinned 4.0.0 is fine). Pre-0.4.x graphs created before this code path settled may have datasets without the flag and cannot be retrofitted in place — the supported path is dump-and-reload. The `stage_overwrite` rewrite path (used by `schema_apply`) preserves the flag through `Operation::Overwrite`; pinned by `stage_overwrite_preserves_stable_row_ids` in `crates/omnigraph/tests/staged_writes.rs`. - **Append / delete / `merge_insert`**: native Lance write modes. - **Per-dataset branches** (Lance native): copy-on-write at the dataset level. - **Object-store agnostic**: file://, s3://, gs://, az://, http (read-only via Lance) — OmniGraph wires file:// and s3:// (`storage.rs`). @@ -22,7 +22,7 @@ OmniGraph is **not** a single Lance dataset; it is a *graph* of datasets coordin - `edges/{fnv1a64-hex(edge_type_name)}` — one Lance dataset per edge type - `__manifest/` — the catalog of all sub-tables and their published versions - `_graph_commits.lance` / `_graph_commit_actors.lance` — the commit graph and its actor map - - (legacy `_graph_runs.lance` / `_graph_run_actors.lance` from pre-v0.4.0 repos are inert; the run state machine was removed in MR-771 and these files are cleaned up via MR-770's production sweep) + - (legacy `_graph_runs.lance` / `_graph_run_actors.lance` from pre-v0.4.0 graphs are inert; the run state machine was removed in MR-771 and these files are cleaned up via MR-770's production sweep) - **Manifest row schema** (`object_id, object_type, location, metadata, base_objects, table_key, table_version, table_branch, row_count`): - `object_type` ∈ `table | table_version | table_tombstone` - `table_key` ∈ `node: | edge:` @@ -36,7 +36,7 @@ OmniGraph is **not** a single Lance dataset; it is a *graph* of datasets coordin The on-disk shape of `__manifest` is reconciled with the binary via a single stamp + dispatcher. `INTERNAL_MANIFEST_SCHEMA_VERSION` declares the shape this binary writes; the on-disk stamp `omnigraph:internal_schema_version` lives in the manifest dataset's schema-level metadata (Lance `update_schema_metadata`). -- **`init_manifest_repo`** stamps the current version at creation, so newly initialized repos never need migration. +- **`init_manifest_graph`** stamps the current version at creation, so newly initialized graphs never need migration. - **Publisher open-for-write path** (`load_publish_state`) calls `migrate_internal_schema(&mut dataset)` before reading state. When the on-disk stamp matches the binary, this is a single metadata read with no writes; otherwise the dispatcher walks `match`-arm steps forward (1→2, 2→3, …) until the stamp matches, then proceeds with the publish. Reads stay side-effect-free. - **Forward-version protection**: a stamp *higher* than the binary's known version triggers a clear "upgrade omnigraph first" error. An old binary cannot clobber a newer schema by silently treating "unknown stamp" as "missing stamp". - **Idempotency**: each migration step is safe to re-run. A crash between two metadata updates inside a single step leaves the partial state; the next open re-runs the step and the second update lands. The dispatcher itself is a cheap stamp-read on the steady-state path. @@ -50,14 +50,14 @@ Adding a new on-disk shape change is one constant bump (`INTERNAL_MANIFEST_SCHEM ## On-disk layout -A repo on disk is a directory tree of Lance datasets. Each dataset follows the standard Lance layout (`_versions/`, `data/`, `_indices/`, `_refs/`); OmniGraph adds the multi-dataset coordination by keeping `__manifest/` alongside the per-type datasets. +A graph on disk is a directory tree of Lance datasets. Each dataset follows the standard Lance layout (`_versions/`, `data/`, `_indices/`, `_refs/`); OmniGraph adds the multi-dataset coordination by keeping `__manifest/` alongside the per-type datasets. ```mermaid flowchart TB classDef l1 fill:#fef3e8,stroke:#c46900,color:#000 classDef l2 fill:#e8f4fd,stroke:#1e6aa8,color:#000 - repo["repo URI
file:// or s3://bucket/prefix"]:::l2 + graph["graph URI
file:// or s3://bucket/prefix"]:::l2 manifest["__manifest/
L2 catalog of sub-tables"]:::l2 nodes["nodes/{fnv1a64-hex}/
one dataset per node type"]:::l2 @@ -66,12 +66,12 @@ flowchart TB recovery["__recovery/{ulid}.json
recovery sidecars (transient)"]:::l2 refs["_refs/branches/{name}.json
graph-level branches"]:::l2 - repo --> manifest - repo --> nodes - repo --> edges - repo --> cgraph - repo --> recovery - repo --> refs + graph --> manifest + graph --> nodes + graph --> edges + graph --> cgraph + graph --> recovery + graph --> refs subgraph dataset[Inside each Lance dataset — L1] ds_v["_versions/{n}.manifest
per-dataset versions"]:::l1 @@ -88,10 +88,10 @@ flowchart TB **What's where:** -- **Repo root** is one directory (or S3 prefix). Everything below is part of one OmniGraph repo. +- **Graph root** is one directory (or S3 prefix). Everything below is part of one OmniGraph graph. - **`__manifest/`** is a Lance dataset whose rows describe which sub-table version is published at which graph-branch. Reading a snapshot starts here. - **`nodes/`** and **`edges/`** are sibling directories holding one Lance dataset per declared type. Names are `fnv1a64-hex` of the type name to keep paths fixed-length and case-safe. -- **`_graph_commits.lance`** is an L2 dataset that records the graph-level commit DAG, with a paired `_graph_commit_actors.lance` for the actor map. (Pre-v0.4.0 repos also have inert `_graph_runs.lance` / `_graph_run_actors.lance` from the removed Run state machine; MR-770 sweeps these in production.) +- **`_graph_commits.lance`** is an L2 dataset that records the graph-level commit DAG, with a paired `_graph_commit_actors.lance` for the actor map. (Pre-v0.4.0 graphs also have inert `_graph_runs.lance` / `_graph_run_actors.lance` from the removed Run state machine; MR-770 sweeps these in production.) - **`_graph_commit_recoveries.lance`** — one row per recovery sweep action. Joined to `_graph_commits.lance` by `graph_commit_id`; the linked commit row carries `actor_id=omnigraph:recovery`. Operators correlate recoveries with the original mutations they rolled forward / back via this join. See `crates/omnigraph/src/db/recovery_audit.rs`. - **`__recovery/{ulid}.json`** — transient sidecar files written by the four migrated writers (`MutationStaging::finalize`, `schema_apply`, `branch_merge`, `ensure_indices`) before Phase B begins, deleted after Phase C succeeds. A sidecar persisting after process exit means the writer crashed in the Phase B → Phase C window; the next `Omnigraph::open` recovery sweep processes it. Steady-state directory is empty. See `crates/omnigraph/src/db/manifest/recovery.rs`. - **`_refs/branches/{name}.json`** is graph-level branch metadata — pointers from a branch name to the manifest version it heads. diff --git a/docs/user/transactions.md b/docs/user/transactions.md index c917b46..e4ed485 100644 --- a/docs/user/transactions.md +++ b/docs/user/transactions.md @@ -48,7 +48,7 @@ query register_employee_with_team($name: String, $age: I32, $team: String) { ```bash omnigraph change --query ./mutations.gq --name register_employee_with_team \ - --params '{"name":"Alice","age":30,"team":"Acme"}' ./repo.omni + --params '{"name":"Alice","age":30,"team":"Acme"}' ./graph.omni ``` If the second statement fails (e.g. `Acme` doesn't exist), the publisher never publishes; `Alice` is not in the database. Atomic. @@ -57,10 +57,10 @@ If the second statement fails (e.g. `Acme` doesn't exist), the publisher never p ```bash # Query 1 -omnigraph change --query ./mutations.gq --name register_employee --params '{"name":"Alice","age":30}' ./repo.omni +omnigraph change --query ./mutations.gq --name register_employee --params '{"name":"Alice","age":30}' ./graph.omni # Query 2 — runs after Query 1 has already published -omnigraph change --query ./mutations.gq --name link_to_team --params '{"name":"Alice","team":"Acme"}' ./repo.omni +omnigraph change --query ./mutations.gq --name link_to_team --params '{"name":"Alice","team":"Acme"}' ./graph.omni ``` These are **two publishes** on `main`. If Query 2 fails, Query 1's effects are already visible. There is no `ROLLBACK` for Query 1. @@ -75,32 +75,32 @@ The pattern when you need to run multiple queries — possibly across multiple c ```bash # Fork a working branch from main. -omnigraph branch create --from main onboarding/2026-04-25 ./repo.omni +omnigraph branch create --from main onboarding/2026-04-25 ./graph.omni # Run any number of mutations on the branch — each one is its own publish on the branch. # Concurrent reads of `main` are unaffected. omnigraph change --branch onboarding/2026-04-25 \ --query ./mutations.gq --name register_employee \ - --params '{"name":"Alice","age":30}' ./repo.omni + --params '{"name":"Alice","age":30}' ./graph.omni omnigraph change --branch onboarding/2026-04-25 \ --query ./mutations.gq --name register_employee \ - --params '{"name":"Bob","age":25}' ./repo.omni + --params '{"name":"Bob","age":25}' ./graph.omni omnigraph change --branch onboarding/2026-04-25 \ --query ./mutations.gq --name link_to_team \ - --params '{"name":"Alice","team":"Acme"}' ./repo.omni + --params '{"name":"Alice","team":"Acme"}' ./graph.omni # Inspect the branch — read queries work just like on main. omnigraph read --branch onboarding/2026-04-25 \ - --query ./queries.gq --name list_employees ./repo.omni + --query ./queries.gq --name list_employees ./graph.omni # Happy with what's on the branch? Merge it. This is one atomic publish: # `main` flips to include every commit on the branch. -omnigraph branch merge onboarding/2026-04-25 --into main ./repo.omni +omnigraph branch merge onboarding/2026-04-25 --into main ./graph.omni # OR: not happy? Throw it away. `main` is untouched. -# omnigraph branch delete onboarding/2026-04-25 ./repo.omni +# omnigraph branch delete onboarding/2026-04-25 ./graph.omni ``` Properties: @@ -115,16 +115,16 @@ Two agents writing to the same graph independently: ```bash # Agent A -omnigraph branch create --from main agent-a/work ./repo.omni -omnigraph change --branch agent-a/work … ./repo.omni +omnigraph branch create --from main agent-a/work ./graph.omni +omnigraph change --branch agent-a/work … ./graph.omni # … many mutations … -omnigraph branch merge agent-a/work --into main ./repo.omni +omnigraph branch merge agent-a/work --into main ./graph.omni # Agent B (running concurrently) -omnigraph branch create --from main agent-b/work ./repo.omni -omnigraph change --branch agent-b/work … ./repo.omni +omnigraph branch create --from main agent-b/work ./graph.omni +omnigraph change --branch agent-b/work … ./graph.omni # … many mutations … -omnigraph branch merge agent-b/work --into main ./repo.omni +omnigraph branch merge agent-b/work --into main ./graph.omni ``` Each agent sees a consistent snapshot of `main` at the time it forked. The first merge to `main` lands as a fast-forward (or a no-op if no concurrent change). The second merge runs three-way: rows touched by both branches surface as `MergeConflict`s for the caller to resolve. @@ -138,7 +138,7 @@ This is the workflow MR-797 / agentic loops are designed around: **branches are | Single query fails mid-flight | Publisher never publishes; target unchanged | Read the error, decide whether to retry | | Concurrent writers race the same `(table, branch)` | Publisher CAS rejects the loser with `ManifestConflictDetails::ExpectedVersionMismatch` | Refresh handle, retry the query | | Branch with N successful mutations, then merge fails (three-way conflict) | Each individual mutation already committed on the branch; merge surfaces `MergeConflicts` | Inspect, decide whether to keep working on the branch, abandon it (`branch_delete`), or resolve and re-merge | -| Process crashes mid-branch-workflow | Each completed mutation on the branch is durable | Re-open the repo, continue where you left off | +| Process crashes mid-branch-workflow | Each completed mutation on the branch is durable | Re-open the graph, continue where you left off | ## When to use what @@ -156,7 +156,7 @@ This is the workflow MR-797 / agentic loops are designed around: **branches are - **Cross-query atomicity on `main` without a branch.** If you don't want to fork a branch, multiple queries on `main` publish independently. There is no implicit transaction. - **Long-running interactive transactions.** No `BEGIN` over a connection. Branches are the durable equivalent. -- **Cross-graph (cross-repo) transactions.** Each repo is its own atomicity domain. +- **Cross-graph transactions.** Each graph is its own atomicity domain. - **"Pessimistic" locks** that serialize writers before they reach the storage layer. Snapshot-MVCC + publisher CAS handles concurrency optimistically; the loser retries. ## See also diff --git a/openapi.json b/openapi.json index 8e36231..21bef1f 100644 --- a/openapi.json +++ b/openapi.json @@ -7,7 +7,7 @@ "name": "MIT", "identifier": "MIT" }, - "version": "0.4.2" + "version": "0.6.0" }, "paths": { "/branches": { diff --git a/scripts/apply-branch-protection.sh b/scripts/apply-branch-protection.sh index 910d5b6..25e93ee 100755 --- a/scripts/apply-branch-protection.sh +++ b/scripts/apply-branch-protection.sh @@ -3,7 +3,7 @@ # # Requires: # - `gh` CLI authenticated. -# - Repo-admin or org-admin permissions on ModernRelay/omnigraph. +# - Repository-admin or org-admin permissions on ModernRelay/omnigraph. # # This script is idempotent: re-running applies whatever is currently # declared in .github/branch-protection.json. The JSON file is the diff --git a/scripts/check-agents-md.sh b/scripts/check-agents-md.sh index ebb4606..abc6469 100755 --- a/scripts/check-agents-md.sh +++ b/scripts/check-agents-md.sh @@ -34,7 +34,7 @@ PY canonical=() while IFS= read -r line; do canonical+=("$line") -done < <(find docs -type f -name '*.md' ! -path 'docs/releases/*' | sort) +done < <(find docs -type f -name '*.md' ! -path 'docs/releases/*' ! -path 'docs/internal/*' | sort) if [[ -d docs/releases ]]; then canonical+=("docs/releases/") fi diff --git a/scripts/local-rustfs-bootstrap.sh b/scripts/local-rustfs-bootstrap.sh index a314ebd..6327f77 100755 --- a/scripts/local-rustfs-bootstrap.sh +++ b/scripts/local-rustfs-bootstrap.sh @@ -291,7 +291,7 @@ ensure_bucket() { s3api create-bucket --bucket "$BUCKET" >/dev/null 2>&1 || true } -repo_prefix_has_objects() { +graph_prefix_has_objects() { local key_count key_count="$("$AWS_BIN" --endpoint-url "$AWS_ENDPOINT_URL_S3" \ s3api list-objects-v2 \ @@ -304,27 +304,27 @@ repo_prefix_has_objects() { [ -n "$key_count" ] && [ "$key_count" != "None" ] && [ "$key_count" != "0" ] } -reset_repo_prefix() { +reset_graph_prefix() { log "Removing existing objects under $REPO_URI" "$AWS_BIN" --endpoint-url "$AWS_ENDPOINT_URL_S3" \ s3 rm "s3://$BUCKET/$PREFIX" --recursive >/dev/null } -initialize_repo() { +initialize_graph() { if "$BIN_DIR/omnigraph" snapshot "$REPO_URI" --json >/dev/null 2>&1; then - log "Reusing existing repo at $REPO_URI" + log "Reusing existing graph at $REPO_URI" return fi - if repo_prefix_has_objects; then + if graph_prefix_has_objects; then if [ "$RESET_REPO" = "1" ]; then - reset_repo_prefix + reset_graph_prefix else - die "found existing objects under $REPO_URI but could not open an Omnigraph repo there. This usually means a previous bootstrap left a partially initialized prefix. Rerun with RESET_REPO=1 to delete that prefix and recreate it, or set PREFIX to a new value." + die "found existing objects under $REPO_URI but could not open an Omnigraph graph there. This usually means a previous bootstrap left a partially initialized prefix. Rerun with RESET_REPO=1 to delete that prefix and recreate it, or set PREFIX to a new value." fi fi - log "Initializing repo at $REPO_URI" + log "Initializing graph at $REPO_URI" "$BIN_DIR/omnigraph" init --schema "$FIXTURE_DIR/context.pg" "$REPO_URI" log "Loading context fixture into $REPO_URI" @@ -377,7 +377,7 @@ Omnigraph local RustFS demo is up. Server: $base_url -Repo URI: +Graph URI: $REPO_URI RustFS console: @@ -414,7 +414,7 @@ main() { start_rustfs wait_for_rustfs ensure_bucket - initialize_repo + initialize_graph start_server print_summary "$(wait_for_server)" } diff --git a/scripts/update-homebrew-formula.sh b/scripts/update-homebrew-formula.sh index 6b3984c..90a5dea 100755 --- a/scripts/update-homebrew-formula.sh +++ b/scripts/update-homebrew-formula.sh @@ -6,7 +6,7 @@ usage() { Usage: update-homebrew-formula.sh [formula_path] Environment: - REPO_SLUG GitHub repo that owns the Omnigraph release + REPO_SLUG GitHub repository that owns the Omnigraph release default: ModernRelay/omnigraph EOF }