fix(engine): IndexCoverage reports Degraded for uncovered fragments

key_column_index_coverage checked BTREE-exists + physical_rows but not that the
index actually covers the current fragments. Since edge-index creation is skipped
once a BTREE exists, fragments appended later stay unindexed while coverage still
reported Indexed — so the cost chooser priced a partly-full scan as fully indexed.

Compare the BTREE's fragment_bitmap (public on lance_table IndexMetadata) against
the dataset's current fragment ids; report Degraded when any are uncovered. A None
bitmap means Lance can't report coverage — don't over-degrade. Results are
unaffected (the scan returns unindexed-fragment rows either way); this corrects
the cost signal.

Test: a freshly-loaded edge BTREE is Indexed; after appending an edge the new
fragment is uncovered → Degraded. Surface guard pins IndexMetadata.fragment_bitmap.
This commit is contained in:
Ragnor Comerford 2026-06-09 17:22:36 +02:00
parent edca75457e
commit cca7da5373
No known key found for this signature in database
3 changed files with 69 additions and 5 deletions

View file

@ -659,22 +659,22 @@ impl TableStore {
.load_indices()
.await
.map_err(|e| OmniError::Lance(e.to_string()))?;
let has_btree = indices
let btree = indices
.iter()
.filter(|index| !is_system_index(index))
.filter(|index| index.fields.len() == 1 && index.fields[0] == field_id)
.any(|index| {
.find(|index| {
index
.index_details
.as_ref()
.map(|details| details.type_url.ends_with("BTreeIndexDetails"))
.unwrap_or(false)
});
if !has_btree {
let Some(btree) = btree else {
return Ok(IndexCoverage::Degraded {
reason: format!("no BTREE index on '{}'", column),
});
}
};
// Same check Lance runs: a fragment missing physical_rows disables
// scalar indices for the entire scan (all-or-nothing).
if ds.fragments().iter().any(|f| f.physical_rows.is_none()) {
@ -682,6 +682,27 @@ impl TableStore {
reason: "a fragment is missing physical_rows".to_string(),
});
}
// An index only covers the fragments it was built over; fragments
// appended afterward (edge-index creation is skipped once a BTREE exists)
// are scanned unindexed. If any CURRENT fragment is absent from the
// index's `fragment_bitmap`, the scan is partly a full scan — so the
// chooser must not price it as fully indexed. A `None` bitmap means Lance
// can't report coverage; don't over-degrade in that case.
if let Some(bitmap) = btree.fragment_bitmap.as_ref() {
let uncovered = ds
.fragments()
.iter()
.filter(|f| !bitmap.contains(f.id as u32))
.count();
if uncovered > 0 {
return Ok(IndexCoverage::Degraded {
reason: format!(
"{} fragment(s) not covered by the index on '{}'",
uncovered, column
),
});
}
}
Ok(IndexCoverage::Indexed)
}

View file

@ -402,15 +402,20 @@ async fn _compile_scalar_index_coverage_surface() -> lance::Result<()> {
// disables the scalar index for the entire scan.
for frag in ds.fragments().iter() {
let _physical_rows: Option<usize> = frag.physical_rows;
// `key_column_index_coverage` checks each current fragment id against the
// index `fragment_bitmap`.
let _id: u64 = frag.id;
}
// The index sniff: BTREE presence is detected by single-field index whose
// details type_url ends with "BTreeIndexDetails".
// details type_url ends with "BTreeIndexDetails". The fragment coverage check
// reads `fragment_bitmap` (Option<RoaringBitmap>) and calls `.contains(u32)`.
let indices = ds.load_indices().await?;
for index in indices.iter() {
let _fields: &Vec<i32> = &index.fields;
if let Some(details) = index.index_details.as_ref() {
let _type_url: &str = details.type_url.as_str();
}
let _covered: Option<bool> = index.fragment_bitmap.as_ref().map(|b| b.contains(0u32));
}
Ok(())
}

View file

@ -97,6 +97,44 @@ async fn key_column_index_coverage_detects_btree_presence() {
);
}
// An edge appended after the BTREE was built lands in a new fragment that the
// index does not cover (edge-index creation is skipped once a BTREE exists). The
// scan is then partly a full scan, so coverage must report `Degraded` — otherwise
// the cost chooser would price an unindexed-in-part scan as fully indexed.
// (Results stay correct regardless — `indexed_finds_unindexed_appended_edge`.)
#[tokio::test]
async fn coverage_degrades_for_appended_unindexed_fragment() {
let dir = tempfile::tempdir().unwrap();
let mut db = init_and_load(&dir).await;
// Fresh load: the Knows BTREE covers every fragment → Indexed.
let snap = snapshot_main(&db).await.unwrap();
let edge_ds = snap.open("edge:Knows").await.unwrap();
assert_eq!(
TableStore::key_column_index_coverage(&edge_ds, "src").await.unwrap(),
IndexCoverage::Indexed,
"freshly-loaded edge BTREE covers all fragments"
);
// Append an edge → a new, unindexed fragment outside the index fragment_bitmap.
mutate_main(
&mut db,
MUTATION_QUERIES,
"add_friend",
&params(&[("$from", "Alice"), ("$to", "Diana")]),
)
.await
.unwrap();
let snap2 = snapshot_main(&db).await.unwrap();
let edge_ds2 = snap2.open("edge:Knows").await.unwrap();
let cov = TableStore::key_column_index_coverage(&edge_ds2, "src").await.unwrap();
assert!(
matches!(cov, IndexCoverage::Degraded { .. }),
"appended unindexed fragment must degrade coverage, got {cov:?}"
);
}
#[tokio::test]
#[serial]
async fn indexed_matches_csr_one_hop_same_type() {