diff --git a/crates/omnigraph/src/db/manifest/migrations.rs b/crates/omnigraph/src/db/manifest/migrations.rs index e2801fe..a49135b 100644 --- a/crates/omnigraph/src/db/manifest/migrations.rs +++ b/crates/omnigraph/src/db/manifest/migrations.rs @@ -113,20 +113,27 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()> /// so the merge-insert conflict resolver enforces row-level CAS at commit /// time, then bump the stamp. /// -/// Both steps are idempotent under retry: re-applying the field annotation -/// at its current value is a no-op-ish bump in Lance, and the stamp is a -/// simple key-value write. A crash between the two leaves the field set -/// without a stamp; the next open re-runs this fn and only the stamp lands. +/// Idempotent under crash-retry by construction. Lance 7 makes the unenforced +/// primary key **immutable once set**: any write that touches the reserved +/// `lance-schema:unenforced-primary-key` field metadata after the PK is set +/// errors ("cannot be changed once set", `lance::dataset::transaction`), even +/// re-applying the same value. A crash between the field-set and the stamp +/// bump leaves the field set without a stamp, so the next open re-enters here +/// with the PK already present — we must therefore set it only when absent. +/// (Fresh graphs bake the PK into `manifest_schema()` at init and never run +/// this migration; only genuine pre-v0.4.0 graphs do.) async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> { - dataset - .update_field_metadata() - .update( - "object_id", - [(OBJECT_ID_PK_KEY.to_string(), "true".to_string())], - ) - .map_err(|e| OmniError::Lance(e.to_string()))? - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; + if dataset.schema().unenforced_primary_key().is_empty() { + dataset + .update_field_metadata() + .update( + "object_id", + [(OBJECT_ID_PK_KEY.to_string(), "true".to_string())], + ) + .map_err(|e| OmniError::Lance(e.to_string()))? + .await + .map_err(|e| OmniError::Lance(e.to_string()))?; + } set_stamp(dataset, 2).await } diff --git a/crates/omnigraph/src/db/manifest/tests.rs b/crates/omnigraph/src/db/manifest/tests.rs index 885a2a8..b396866 100644 --- a/crates/omnigraph/src/db/manifest/tests.rs +++ b/crates/omnigraph/src/db/manifest/tests.rs @@ -336,40 +336,66 @@ async fn test_directory_namespace_direct_publish_cannot_replace_native_omnigraph .await .unwrap(); - let versions = namespace - .list_table_versions(ListTableVersionsRequest { - id: Some(vec!["node:Person".to_string()]), - descending: Some(true), - ..Default::default() - }) - .await - .unwrap(); - assert_eq!( - versions.versions[0].version as u64, - person_entry.table_version + // Lance 7: the native `DirectoryNamespace` no longer recognizes omnigraph's + // manifest-tracked tables. `check_table_status` reports the table absent + // (`lance-namespace-impls` dir.rs), so list / describe / create_table_version + // all return `TableNotFound`. The native path is fully decoupled from + // omnigraph's manifest authority: it cannot enumerate, inspect, or publish + // over omnigraph's tables. (Pre-v7 the native namespace could still *see* the + // published version but never replace the write path; v7 only widens the gap.) + let assert_table_not_found = |what: &str, dbg: String| { + assert!( + dbg.contains("TableNotFound") && dbg.contains("node:Person"), + "{what}: expected TableNotFound for node:Person, got: {dbg}" + ); + }; + assert_table_not_found( + "list_table_versions", + format!( + "{:?}", + namespace + .list_table_versions(ListTableVersionsRequest { + id: Some(vec!["node:Person".to_string()]), + descending: Some(true), + ..Default::default() + }) + .await + .unwrap_err() + ), + ); + assert_table_not_found( + "describe_table_version", + format!( + "{:?}", + namespace + .describe_table_version(DescribeTableVersionRequest { + id: Some(vec!["node:Person".to_string()]), + version: Some(person_version as i64), + ..Default::default() + }) + .await + .unwrap_err() + ), + ); + assert_table_not_found( + "create_table_version", + format!( + "{:?}", + namespace + .create_table_version(version_metadata.to_create_table_version_request( + "node:Person", + person_version, + 1, + None, + )) + .await + .unwrap_err() + ), ); - let err = namespace - .describe_table_version(DescribeTableVersionRequest { - id: Some(vec!["node:Person".to_string()]), - version: Some(person_version as i64), - ..Default::default() - }) - .await - .unwrap_err(); - assert!(err.to_string().contains("not found")); - - let err = namespace - .create_table_version(version_metadata.to_create_table_version_request( - "node:Person", - person_version, - 1, - None, - )) - .await - .unwrap_err(); - assert!(err.to_string().contains("already exists")); - + // omnigraph's manifest stays authoritative: refresh ignores the direct + // `person_ds.append` above (it was never manifest-published), so the row + // count stays 0 and the version is unchanged. mc.refresh().await.unwrap(); assert_eq!( mc.snapshot().entry("node:Person").unwrap().table_version, diff --git a/docs/dev/lance.md b/docs/dev/lance.md index 2ad1273..daa0435 100644 --- a/docs/dev/lance.md +++ b/docs/dev/lance.md @@ -156,7 +156,7 @@ If a future need pulls one of these into scope, add a row to the matching domain When Lance ships a major release that changes any of the above (file format bump, new index type, transaction semantics change, new branching primitive), refresh this index in the same change as the omnigraph upgrade. Stale Lance pointers are worse than no pointers. -### Last alignment audit: 2026-06-14 (Lance 7.0.0 upstream; omnigraph pinned at 7.0.0) +### Last alignment audit: 2026-06-15 (Lance 7.0.0 upstream; omnigraph pinned at 7.0.0) Migration from Lance 6.0.1 → 7.0.0 landed in this cycle. **Arrow stayed 58, DataFusion stayed 53** (no change) — the only transitive bump is `object_store` 0.12.5 → 0.13.2. 141 upstream commits reviewed (6.0.1 → 7.0.0); no fixes lost (the 6.0.x release-branch backports are all forward-ported into 7.0.0). Behavior-affecting findings: @@ -166,8 +166,10 @@ Migration from Lance 6.0.1 → 7.0.0 landed in this cycle. **Arrow stayed 58, Da - **BTREE range-query bound inclusiveness fixed** (PR #6796, issue #6792): `x <= hi AND x > lo` returned the wrong boundary row on 6.0.1. omnigraph today builds BTREE only on string `@key` columns (`id`/`src`/`dst`) and queries them by equality/IN, not range, so its *current* query patterns almost certainly never hit this bug — but the corrected boundary semantics are a contract we rely on the moment a BTREE-range path appears (BTREE-on-properties via the index-type tickets, or a range-on-key query). Pinned by `lance_surface_guards.rs::btree_range_query_boundary_is_correct` (reproduces #6792's 5-row + BTREE shape). - **`WriteParams::auto_cleanup` default flipped from on (every-20-commits) to `None`** (PR #6755). On 6.0.1 the on-by-default hook could GC versions the `__manifest` pins for snapshots/time-travel. omnigraph owns cleanup explicitly (`optimize.rs::cleanup_all_tables`). Two parts to the fix, because `auto_cleanup` is **create-time config only and has no effect on existing datasets** (Lance `write.rs` docs): (1) `auto_cleanup: None` at all 11 `WriteParams` sites so *new* datasets store no cleanup config; (2) — the load-bearing half — `skip_auto_cleanup: true` on every commit path, because graphs created **before** the bump still carry the on-config in their datasets, and Lance's hook fires off the *dataset's stored* config at commit time (`io/commit.rs`: `if !commit_config.skip_auto_cleanup`). So the staged commit path (`commit_staged` → `CommitBuilder::with_skip_auto_cleanup(true)`), the `__manifest` publisher (`MergeInsertBuilder::skip_auto_cleanup(true)`), and the direct `WriteParams` paths all skip the hook. Without this, an upgraded graph would still auto-cleanup and delete `__manifest`-pinned versions. Pinned by `lance_surface_guards.rs::skip_auto_cleanup_suppresses_version_gc` (negative control + with-skip survival). - **Lance #6658 SHIPPED in 7.0.0** (`DeleteBuilder::execute_uncommitted`, exposed via PR #6781) → MR-A (migrate `delete_where` to the staged two-phase API, retire the parse-time D2 rule) is now **unblocked**, tracked separately (dev-graph `iss-950`). The bump itself keeps `delete_where` inline; the `_compile_delete_result_field_shape` guard is left untouched until MR-A. +- **The unenforced primary key is now immutable once set** (`lance::dataset::transaction`, ~L2472–2480: `if !primary_key_before.is_empty() && (writes_primary_key || primary_key_after != primary_key_before) → "the unenforced primary key is a reserved key and cannot be changed once set"`). omnigraph marks `__manifest.object_id` as the unenforced PK (`lance-schema:unenforced-primary-key`) for merge-insert row-level CAS — baked into `manifest_schema()` at init, and added by the `migrate_v1_to_v2` internal-schema migration for pre-v0.4.0 graphs. The migration relied on Lance 6's idempotent re-apply for crash-recovery (a crash after the field-set but before the stamp bump re-enters the migration with the PK already present); under v7 that re-apply errors, so a real v1 graph could never finish migrating. Fixed by guarding the set with `schema().unenforced_primary_key().is_empty()` (`db/manifest/migrations.rs`). Regression: `db::manifest::tests::test_publish_migrates_pre_stamp_manifest_to_current_version` (was red under v7). +- **Native `DirectoryNamespace` no longer recognizes omnigraph's manifest-tracked tables** (`lance-namespace-impls` dir.rs ~L1310): `list/describe/create_table_version` route through `check_table_status`, which reports an omnigraph table absent → `TableNotFound`. omnigraph production never uses Lance's native namespace (its publisher writes `__manifest` directly via merge_insert; its own `namespace.rs` impls are custom), so this is test-only — the `test_directory_namespace_direct_publish_cannot_replace_native_omnigraph_write_path` surface guard was realigned to the v7 behavior (it now asserts the native namespace is fully decoupled, which only strengthens the guard's thesis). - **Still NOT fixed in 7.0.0:** vector-index two-phase (Lance #6666 open) — `create_vector_index` inline residual retained; blob-column compaction — `compact_files_still_fails_on_blob_columns` guard still red on a fix, `optimize` still skips blob tables behind `LANCE_SUPPORTS_BLOB_COMPACTION`. -- **No Lance-API surface omnigraph uses changed 6.0.1 → 7.0.0** (verified by a clean engine build; the only compile break was object_store). `CleanupPolicy`, `WriteParams` (apart from the `auto_cleanup` default), `CompactionOptions`, the namespace models (resolved via `lance-namespace-reqwest-client` 0.7.7, unchanged across the bump), `Operation`, `ManifestLocation`, and `MergeInsertBuilder` shapes are all stable. +- **No Lance API surface omnigraph uses changed at *compile* time** (the only compile break was object_store) — but **two runtime behaviors did** (the unenforced-PK immutability and the native-namespace `TableNotFound`, above), each caught by the full engine test suite rather than the build. `CleanupPolicy`, `WriteParams` (apart from the `auto_cleanup` default), `CompactionOptions`, the namespace models (resolved via `lance-namespace-reqwest-client` 0.7.7, unchanged across the bump), `Operation`, `ManifestLocation`, and `MergeInsertBuilder` shapes are all stable. Lesson: a clean build is not a clean alignment — run `cargo test --workspace` before declaring a Lance bump done. Bump this date stanza on the next alignment pass.