mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-18 02:24:27 +02:00
fix(engine): v1→v2 migration verifies the unenforced PK is object_id, not just non-empty (#239)
Greptile follow-up (#236): `migrate_v1_to_v2` guarded the field-set with `unenforced_primary_key().is_empty()`, which skips the set whenever *any* field is the PK — including the (corrupt/unexpected) case where a field other than `object_id` carries it. That would silently leave merge-insert row-level CAS keyed on the wrong column, and Lance 7 forbids changing the PK afterward. Match on the specific PK field instead: `["object_id"]` is the idempotent crash-recovery no-op, `[]` sets it (the genuine pre-v0.4.0 first migration), and any other PK refuses loudly. Defensive — Lance won't let a fresh graph reach the error branch — but correct by construction. The idempotent re-entry path stays covered by test_publish_migrates_pre_stamp_manifest_to_current_version (28 manifest tests green). Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
bc2a989a7b
commit
9a607abdec
1 changed files with 30 additions and 10 deletions
|
|
@ -123,16 +123,36 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()>
|
|||
/// (Fresh graphs bake the PK into `manifest_schema()` at init and never run
|
||||
/// this migration; only genuine pre-v0.4.0 graphs do.)
|
||||
async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> {
|
||||
if dataset.schema().unenforced_primary_key().is_empty() {
|
||||
dataset
|
||||
.update_field_metadata()
|
||||
.update(
|
||||
"object_id",
|
||||
[(OBJECT_ID_PK_KEY.to_string(), "true".to_string())],
|
||||
)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
// The guard is over the *specific* field, not just "any PK is set": skipping
|
||||
// when `object_id` is already the PK is the idempotent crash-recovery path,
|
||||
// but a manifest whose PK is some *other* field has the wrong CAS key — and
|
||||
// Lance 7 won't let us change it. Refuse loudly rather than silently leave
|
||||
// merge-insert conflict detection keyed on the wrong column.
|
||||
let pk_fields: Vec<&str> = dataset
|
||||
.schema()
|
||||
.unenforced_primary_key()
|
||||
.iter()
|
||||
.map(|field| field.name.as_str())
|
||||
.collect();
|
||||
match pk_fields.as_slice() {
|
||||
["object_id"] => {} // already migrated (or a crash re-entry) — idempotent no-op
|
||||
[] => {
|
||||
dataset
|
||||
.update_field_metadata()
|
||||
.update(
|
||||
"object_id",
|
||||
[(OBJECT_ID_PK_KEY.to_string(), "true".to_string())],
|
||||
)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
}
|
||||
other => {
|
||||
return Err(OmniError::manifest_internal(format!(
|
||||
"__manifest unenforced primary key is {other:?}, expected [\"object_id\"]; \
|
||||
refusing to migrate a manifest with an unexpected CAS key"
|
||||
)));
|
||||
}
|
||||
}
|
||||
set_stamp(dataset, 2).await
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue