Merge pull request #85 from ModernRelay/ragnorc/survey-state

engine: pin stable-row-id preservation through stage_overwrite
This commit is contained in:
Ragnor Comerford 2026-05-12 17:24:55 -07:00 committed by GitHub
commit 53d41a30b4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 61 additions and 1 deletions

View file

@ -953,8 +953,19 @@ impl TableStore {
"stage_overwrite called with empty batch".to_string(),
));
}
// `enable_stable_row_ids: true` is defensive — empirically Lance 4.0.0
// preserves the source dataset's flag through `Operation::Overwrite`
// when WriteParams omits it (pinned by
// `stage_overwrite_preserves_stable_row_ids` in tests/staged_writes.rs),
// but setting it explicitly matches the public `overwrite_dataset`
// path and keeps the invariant documented at every Overwrite site
// (see docs/storage.md "Stable row IDs"). Setting it on an existing
// dataset that was created without stable row IDs is a no-op per
// Lance's row-id-lineage spec, so this stays correct for legacy
// datasets.
let params = WriteParams {
mode: WriteMode::Overwrite,
enable_stable_row_ids: true,
allow_external_blob_outside_bases: true,
..Default::default()
};

View file

@ -532,6 +532,54 @@ async fn stage_overwrite_does_not_advance_head_until_commit() {
assert_eq!(collect_ids(&after), vec!["zoe"]);
}
/// `stage_overwrite` is used by `schema_apply` to rewrite tables when
/// an additive migration touches data. The rewrite MUST preserve the
/// source dataset's `enable_stable_row_ids` flag — otherwise every
/// schema_apply that triggers a rewrite would silently disable stable
/// row IDs on the affected tables, and downstream readers depending on
/// `_rowid` stability (change-feed validators, index reconcilers) would
/// observe silent corruption.
///
/// Pinned invariant — see `docs/storage.md` "Stable row IDs".
#[tokio::test]
async fn stage_overwrite_preserves_stable_row_ids() {
let dir = tempfile::tempdir().unwrap();
let uri = format!("{}/people.lance", dir.path().to_str().unwrap());
let store = TableStore::new(dir.path().to_str().unwrap());
// `write_dataset` creates with `enable_stable_row_ids: true` — see
// ADR 0001. We verify that as a precondition so a future change to
// the bootstrap helper that drops the flag surfaces here rather
// than turning this test into a silent no-op.
let ds = TableStore::write_dataset(&uri, person_batch(&[("alice", Some(30))]))
.await
.unwrap();
assert!(
ds.manifest.uses_stable_row_ids(),
"precondition: TableStore::write_dataset must create datasets \
with stable row IDs enabled see ADR 0001"
);
let staged = store
.stage_overwrite(&ds, person_batch(&[("zoe", Some(99))]))
.await
.unwrap();
let new_ds = store
.commit_staged(Arc::new(ds.clone()), staged.transaction)
.await
.unwrap();
assert!(
new_ds.manifest.uses_stable_row_ids(),
"stage_overwrite + commit_staged must preserve \
enable_stable_row_ids from the source dataset. If this fails, \
schema_apply has been silently disabling stable row IDs on \
every additive migration that triggers a table rewrite. Fix \
is in WriteParams at table_store.rs::stage_overwrite see \
ADR 0001."
);
}
/// `stage_overwrite` semantically REPLACES every committed fragment.
/// `removed_fragment_ids` lists every committed fragment so
/// `scan_with_staged` shows only the staged rows (not committed + staged).