diff --git a/.gitignore b/.gitignore index 919d9d8..2248d5a 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ __pycache__/ *.pyc demo/*.omni/ .omnigraph-rustfs-demo/ +/docs/internal # Local-only working files (not for the public repo) .claude/ diff --git a/AGENTS.md b/AGENTS.md index 6e45aa7..0848eeb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -16,7 +16,7 @@ Tools that support `@`-imports (Claude Code) auto-include all three files via th `CLAUDE.md` is a symlink to this file — there is exactly one source of truth. Edit `AGENTS.md`. -**Version surveyed:** 0.4.2 +**Version surveyed:** 0.5.0 **Workspace crates:** `omnigraph-compiler`, `omnigraph` (engine), `omnigraph-cli`, `omnigraph-server` **Storage substrate:** Lance 6.x (columnar, versioned, branchable) **License:** MIT diff --git a/Cargo.lock b/Cargo.lock index fcc2d7d..8e7a0c7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4543,7 +4543,7 @@ dependencies = [ [[package]] name = "omnigraph-cli" -version = "0.4.2" +version = "0.5.0" dependencies = [ "assert_cmd", "clap", @@ -4565,7 +4565,7 @@ dependencies = [ [[package]] name = "omnigraph-compiler" -version = "0.4.2" +version = "0.5.0" dependencies = [ "ahash", "arrow-array", @@ -4586,7 +4586,7 @@ dependencies = [ [[package]] name = "omnigraph-engine" -version = "0.4.2" +version = "0.5.0" dependencies = [ "arc-swap", "arrow-array", @@ -4627,7 +4627,7 @@ dependencies = [ [[package]] name = "omnigraph-policy" -version = "0.4.2" +version = "0.5.0" dependencies = [ "cedar-policy", "clap", @@ -4640,7 +4640,7 @@ dependencies = [ [[package]] name = "omnigraph-server" -version = "0.4.2" +version = "0.5.0" dependencies = [ "async-trait", "aws-config", diff --git a/crates/omnigraph-cli/Cargo.toml b/crates/omnigraph-cli/Cargo.toml index fb232eb..1616366 100644 --- a/crates/omnigraph-cli/Cargo.toml +++ b/crates/omnigraph-cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "omnigraph-cli" -version = "0.4.2" +version = "0.5.0" edition = "2024" description = "CLI for the Omnigraph graph database." license = "MIT" @@ -13,10 +13,10 @@ name = "omnigraph" path = "src/main.rs" [dependencies] -omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.4.2" } -omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" } -omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" } -omnigraph-server = { path = "../omnigraph-server", version = "0.4.2" } +omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.5.0" } +omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.5.0" } +omnigraph-policy = { path = "../omnigraph-policy", version = "0.5.0" } +omnigraph-server = { path = "../omnigraph-server", version = "0.5.0" } clap = { workspace = true } color-eyre = { workspace = true } serde = { workspace = true } diff --git a/crates/omnigraph-compiler/Cargo.toml b/crates/omnigraph-compiler/Cargo.toml index 7bb8df0..f5f0043 100644 --- a/crates/omnigraph-compiler/Cargo.toml +++ b/crates/omnigraph-compiler/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "omnigraph-compiler" -version = "0.4.2" +version = "0.5.0" edition = "2024" description = "Schema/query compiler for Omnigraph. Zero Lance dependency." license = "MIT" diff --git a/crates/omnigraph-policy/Cargo.toml b/crates/omnigraph-policy/Cargo.toml index 3e19ce8..562442d 100644 --- a/crates/omnigraph-policy/Cargo.toml +++ b/crates/omnigraph-policy/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "omnigraph-policy" -version = "0.4.2" +version = "0.5.0" edition = "2024" description = "Policy / authorization layer for Omnigraph — Cedar-backed PolicyEngine, PolicyChecker trait, ResourceScope enum." license = "MIT" diff --git a/crates/omnigraph-server/Cargo.toml b/crates/omnigraph-server/Cargo.toml index b12ddfe..3edc89a 100644 --- a/crates/omnigraph-server/Cargo.toml +++ b/crates/omnigraph-server/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "omnigraph-server" -version = "0.4.2" +version = "0.5.0" edition = "2024" description = "HTTP server for the Omnigraph graph database." license = "MIT" @@ -19,9 +19,9 @@ default = [] aws = ["dep:aws-config", "dep:aws-sdk-secretsmanager"] [dependencies] -omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.4.2" } -omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" } -omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" } +omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.5.0" } +omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.5.0" } +omnigraph-policy = { path = "../omnigraph-policy", version = "0.5.0" } axum = { workspace = true } clap = { workspace = true } color-eyre = { workspace = true } diff --git a/crates/omnigraph/Cargo.toml b/crates/omnigraph/Cargo.toml index a3cc5df..7332989 100644 --- a/crates/omnigraph/Cargo.toml +++ b/crates/omnigraph/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "omnigraph-engine" -version = "0.4.2" +version = "0.5.0" edition = "2024" description = "Runtime engine for the Omnigraph graph database." license = "MIT" @@ -16,8 +16,8 @@ default = [] failpoints = ["dep:fail", "fail/failpoints"] [dependencies] -omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" } -omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" } +omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.5.0" } +omnigraph-policy = { path = "../omnigraph-policy", version = "0.5.0" } lance = { workspace = true } lance-datafusion = { workspace = true } datafusion = { workspace = true } @@ -51,7 +51,7 @@ chrono = { workspace = true } arc-swap = { workspace = true } [dev-dependencies] -omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" } +omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.5.0" } tokio = { workspace = true } lance-namespace-impls = { workspace = true } serial_test = "3" diff --git a/docs/releases/v0.5.0.md b/docs/releases/v0.5.0.md new file mode 100644 index 0000000..16e284e --- /dev/null +++ b/docs/releases/v0.5.0.md @@ -0,0 +1,171 @@ +# Omnigraph v0.5.0 + +Omnigraph v0.5.0 is a substrate, security, and migration-safety release. It +jumps the storage substrate from Lance 4 to Lance 6.0.1 (DataFusion 52 → 53, +Arrow 57 → 58), introduces engine-wide Cedar policy enforcement on every +authoring path, and ships a structured schema-lint v1 chassis with +code-tagged diagnostics, soft drops, and an explicit `--allow-data-loss` +flag for destructive migrations. + +## Highlights + +- **Lance 6.0.1 substrate**: bump from Lance 4.0.0 → 6.0.1, DataFusion 52 → + 53, Arrow 57 → 58. New optimizer rules (vectorized `IN`-list eq kernel, + `PhysicalExprSimplifier`, push-limit-into-hash-join, CASE-NULL shortcut) + reach predicates that flow through the engine. `lance-tokenizer` replaces + tantivy internally; FTS behavior preserved. +- **Cedar policy engine**: a new `omnigraph-policy` crate wires + `Omnigraph::enforce(action, scope, actor)` into every `_as` writer + (`mutate_as`, `load_as`, `apply_schema_as`, `branch_create_as`, + `branch_merge_as`, `branch_delete_as`, plus the load and change + variants). The HTTP server defaults to deny-all when no Cedar policy is + configured; a YAML policy file is required to enable writes. Actor + identity comes only from signed token claims — clients cannot set actor + identity directly. +- **Schema lint v1 chassis**: diagnostics now carry stable codes of the form + `OG-XXX-NNN` instead of free-form messages. `omnigraph schema plan` and + `apply` understand soft drops on properties and types — destructive drops + require the new `--allow-data-loss` flag (Hard mode) at the CLI and an + equivalent JSON flag over HTTP. +- **Structured filter pushdown**: query-language predicates lower to + DataFusion `Expr` and push down through Lance's `Scanner::filter_expr` + instead of being flattened to SQL strings. This unlocks `CompOp::Contains` + pushdown (via `array_has`), which previously fell through to in-memory + post-scan filtering, and lets the DataFusion 53 optimizer rules above act + on our predicates. +- **HTTP `allow_data_loss` parity**: the destructive-drop guard now exists + on both the CLI (`--allow-data-loss`) and HTTP (`allow_data_loss: true` in + the schema-apply request body). +- **Inline query strings on CLI and HTTP**: `omnigraph read` / + `omnigraph mutate` and the corresponding HTTP endpoints accept inline + `.gq` source, not just a file path. Easier ad-hoc queries, clearer + request logs. +- **Browser CORS layer**: optional CORS layer on `omnigraph-server` for + browser-based UIs, gated by `OMNIGRAPH_CORS_ORIGINS`. +- **Merge-insert dup-rowid fix**: Lance's `MergeInsertBuilder` could surface + spurious `"Ambiguous merge inserts"` errors on sequential merges against + rows previously rewritten by `merge_insert`. The engine now opts into + `SourceDedupeBehavior::FirstSeen` with a `check_batch_unique_by_keys` + fail-fast precondition that guarantees source-side dedup happens before + Lance sees the batch. +- **Branch-merge error-path recovery**: a branch merge that failed + mid-flight could leave the in-process coordinator pointing at a stale + active branch. The error path now restores the prior coordinator, + matching the success path's invariant. +- **Branch merge with blob columns**: external blob URIs are now + materialized correctly during branch merge instead of being dropped or + pointing at the source branch. +- **Lance API surface guards**: a new test file + (`crates/omnigraph/tests/lance_surface_guards.rs`) pins eight specific + Lance API surfaces (`LanceError::TooMuchWriteContention`, + `ManifestLocation` fields, `MergeInsertBuilder` return shape, + `WriteParams::default`, `compact_files` signature, etc.) so the next + Lance bump fails compile or runtime on any silent drift rather than + producing wrong-state recovery in production. + +## Behavior changes + +- **On-disk format unchanged**: existing v0.4.2 datasets open unchanged. + The Lance file format pin stays at V2_2 (required by Lance's blob v2 + feature). +- **`omnigraph-server` defaults to deny-all under `--policy`**: starting a + server with the policy feature enabled but no Cedar YAML policy + configured rejects every write. Operators must supply a policy file to + authorize anything. +- **Schema-lint diagnostics carry stable codes**: messages now lead with + `OG-XXX-NNN`. CI parsers or tooling that keyed off the v0.4.2 free-form + text need to switch to code-based matching. +- **Destructive schema drops require `--allow-data-loss`**: dropping a + property or type returns a structured diagnostic by default. + `omnigraph schema apply --allow-data-loss` (CLI) or + `{"allow_data_loss": true}` (HTTP) opts into Hard mode. +- **`HashJoinExec` null-aware semantics on anti-join**: a side effect of + the DataFusion 53 bump — `NOT IN` semantics under null-valued anti-join + columns are now correct per SQL standard. Queries that depended on the + prior behavior would have been incorrect. + +## Upgrade Notes + +### Migration + +- No data migration. v0.4.2 repos open directly on v0.5.0. + +### Clients + +- HTTP and SDK clients should switch any string-matching schema-lint + parsing to code-based matching against the `OG-XXX-NNN` prefix. +- Clients exercising destructive schema drops (`DropProperty`, `DropType`) + must add the `allow_data_loss` request field (HTTP) or + `--allow-data-loss` flag (CLI). Default is soft-drop-or-reject. +- Clients consuming `mutate_as` / `load_as` / `apply_schema_as` / branch + authoring APIs now flow through the policy enforcer. Anything bypassing + authorization on v0.4.2 will be rejected on v0.5.0 once a policy is + configured. + +### Operators + +- Configure a Cedar policy YAML for production servers before enabling + writes; deny-all is the new default. The `omnigraph policy validate` / + `test` / `explain` CLI commands are unchanged. +- Bearer tokens continue to be the actor-identity source; review the + signed-token-claim-only invariant in `docs/dev/invariants.md` if you've + built custom authentication. +- If your local CI uses RustFS for S3-compatible storage testing, our CI + pins `rustfs/rustfs:1.0.0-beta.3` (the last known-good tag before the + upstream credentials-policy change). Mirror the pin or set + `RUSTFS_ALLOW_INSECURE_DEFAULT_CREDENTIALS=true` for the new image + versions. + +## Tests added or strengthened + +- `crates/omnigraph/tests/lance_surface_guards.rs` — 8 named guards pinning + Lance API surfaces against silent drift on future bumps. +- `crates/omnigraph/tests/policy_engine_chassis.rs` — engine-level policy + enforcement coverage; complements the existing HTTP policy tests. +- Policy chassis e2e gap-fills — branch-merge, branch-create, branch-delete + policy paths now have explicit end-to-end tests over HTTP and CLI. +- Merge-pair truth table — exhaustive op-variant matrix for three-way + merge across `noop`, `addNode`, `removeNode`, `addEdge`, `removeEdge`, + `setProperty`, `dropProperty`, `addLabel`, `removeLabel`; the build + fails to compile when a new op variant is added without dispositioning + every pairing. +- Merge-insert: regression for the dup-rowid bug class on the load surface + (`load_merge_repeated_against_overlapping_keys_succeeds`), the update + surface (`second_sequential_update_on_same_row_succeeds`), and the + upstream-Lance-gap canary + (`load_merge_window_2_documents_upstream_lance_gap`). +- Maintenance + destructive-migration coverage — `omnigraph optimize` / + `cleanup` boundary cases, plus schema-apply soft-drop and Hard-mode + paths. +- Stable-row-id preservation across `stage_overwrite` — pins the invariant + that staged overwrites carry stable row IDs through to the committed + fragment set. +- `CompOp::Contains` pushdown regression + (`ir_filter_with_list_contains_pushes_down`) — pins the new structured + Expr pushdown path that retired the in-memory fallback. + +## Included Changes + +- Lance 4 → 6.0.1, DataFusion 52 → 53, Arrow 57 → 58 substrate upgrade. +- `omnigraph-policy` crate with engine-wide Cedar enforcement and + signed-token-claim-only actor identity. +- Schema-lint v1 chassis with `OG-XXX-NNN` codes, soft `DropProperty` / + `DropType` semantics, and `--allow-data-loss` for Hard mode. +- HTTP `allow_data_loss` request field parity with the CLI flag. +- Structured DataFusion `Expr` filter pushdown via + `Scanner::filter_expr`, with `CompOp::Contains` lowered through + `array_has`. +- Inline `.gq` source acceptance on CLI and HTTP read/mutate endpoints. +- Optional CORS layer on `omnigraph-server` for browser UIs. +- Bug fixes: merge-insert dup-rowid (FirstSeen + uniqueness precondition), + branch-merge coordinator restore on error, blob-column materialization + during branch merge. +- New Lance API surface-guard test file as the canary for future Lance + bumps. +- Recovery-sidecar coverage extended across the four write paths + (`MutationStaging::finalize`, `schema_apply`, `branch_merge`, + `ensure_indices`) with failpoint regression tests. +- CI: pinned `rustfs/rustfs:1.0.0-beta.3` after the upstream `:latest` + introduced a credentials-policy change. +- Version bump to `0.5.0` across workspace crates, `Cargo.lock`, + `openapi.json`, and the `AGENTS.md` surveyed version. diff --git a/openapi.json b/openapi.json index b0ed1f2..2a8b9a9 100644 --- a/openapi.json +++ b/openapi.json @@ -7,7 +7,7 @@ "name": "MIT", "identifier": "MIT" }, - "version": "0.4.2" + "version": "0.5.0" }, "paths": { "/branches": { diff --git a/scripts/check-agents-md.sh b/scripts/check-agents-md.sh index ebb4606..abc6469 100755 --- a/scripts/check-agents-md.sh +++ b/scripts/check-agents-md.sh @@ -34,7 +34,7 @@ PY canonical=() while IFS= read -r line; do canonical+=("$line") -done < <(find docs -type f -name '*.md' ! -path 'docs/releases/*' | sort) +done < <(find docs -type f -name '*.md' ! -path 'docs/releases/*' ! -path 'docs/internal/*' | sort) if [[ -d docs/releases ]]; then canonical+=("docs/releases/") fi