mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-21 02:28:07 +02:00
feat(engine): surface silent scalar-index fallback in indexed traversal (C6)
Add TableStore::key_column_index_coverage — a metadata-only check (no IO) of whether a `key_col IN (...)` scan will be served by the persisted BTREE or silently fall back to a full filtered scan, mirroring Lance's own decision: no BTREE on the column, or any fragment missing physical_rows (which disables scalar indices for the whole scan, lance dataset/scanner.rs create_filter_plan). execute_expand_indexed calls it once per traversal and tracing::warn!s on Degraded, so the perf cliff is observable instead of hidden behind a bench oracle. Detection-only: results are correct either way (the scan returns all rows). Closes the "no silent failures" gap the traversal best-practice audit flagged as the top deviation, and adds an IndexCoverage value a future cost-based planner can consume.
This commit is contained in:
parent
ba35fa6211
commit
5a7ab6d427
3 changed files with 106 additions and 0 deletions
|
|
@ -870,6 +870,27 @@ async fn execute_expand_indexed(
|
|||
};
|
||||
let edge_table_key = format!("edge:{}", edge_type);
|
||||
let edge_ds = snapshot.open(&edge_table_key).await?;
|
||||
|
||||
// C6 guard: surface the silent scalar-index fallback. If Lance won't route
|
||||
// the per-hop `key_col IN (...)` through the BTREE (no index, or a fragment
|
||||
// missing physical_rows), the scan is a correct but O(|E|) full scan.
|
||||
// Detection-only (metadata, no IO); never fails the query.
|
||||
match crate::table_store::TableStore::key_column_index_coverage(&edge_ds, key_col).await {
|
||||
Ok(crate::table_store::IndexCoverage::Degraded { reason }) => tracing::warn!(
|
||||
target: "omnigraph::traverse",
|
||||
edge = %edge_type,
|
||||
key_col = key_col,
|
||||
reason = %reason,
|
||||
"indexed traversal falls back to a full edge scan (results correct, perf degraded)"
|
||||
),
|
||||
Ok(crate::table_store::IndexCoverage::Indexed) => {}
|
||||
Err(e) => tracing::debug!(
|
||||
target: "omnigraph::traverse",
|
||||
error = %e,
|
||||
"index-coverage check failed; proceeding with traversal"
|
||||
),
|
||||
}
|
||||
|
||||
let max = max_hops.unwrap_or(min_hops.max(1));
|
||||
|
||||
// Per-source BFS state in string-id space (no dense interning needed).
|
||||
|
|
|
|||
|
|
@ -43,6 +43,19 @@ pub struct DeleteState {
|
|||
pub(crate) version_metadata: TableVersionMetadata,
|
||||
}
|
||||
|
||||
/// Whether a `key_col IN (...)` scan on a dataset will be served by the
|
||||
/// persisted scalar (BTREE) index, or silently fall back to a full filtered
|
||||
/// scan. Detection-only (metadata, no IO); the scan returns the correct rows
|
||||
/// either way. Surfaced by the indexed traversal path so the silent perf
|
||||
/// fallback is observable, and available to a future cost-based planner.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum IndexCoverage {
|
||||
/// The column has a usable BTREE and every fragment records `physical_rows`.
|
||||
Indexed,
|
||||
/// Lance will not use the scalar index for this scan (correct, full scan).
|
||||
Degraded { reason: String },
|
||||
}
|
||||
|
||||
/// A Lance write that has produced fragment files on object storage but is
|
||||
/// not yet committed to the dataset's manifest. The staged-write primitives
|
||||
/// are consumed by `MutationStaging` (`exec/staging.rs`,
|
||||
|
|
@ -628,6 +641,50 @@ impl TableStore {
|
|||
.map_err(|e| OmniError::Lance(e.to_string()))
|
||||
}
|
||||
|
||||
/// Metadata-only check (no IO) of whether `scan_edges_by_endpoint` — a
|
||||
/// `key_col IN (...)` filter — on `ds` will be served by the persisted BTREE
|
||||
/// on `column`, or silently fall back to a full filtered scan. Mirrors
|
||||
/// Lance's own decision: scalar indices are disabled for the whole scan if
|
||||
/// ANY fragment lacks `physical_rows` (lance `dataset/scanner.rs`
|
||||
/// `create_filter_plan`), and are obviously unused if no BTREE on the
|
||||
/// column exists. The scan is correct (returns all rows) either way — this
|
||||
/// only surfaces the perf cliff so the indexed traversal can warn on it.
|
||||
pub async fn key_column_index_coverage(ds: &Dataset, column: &str) -> Result<IndexCoverage> {
|
||||
let Some(field_id) = ds.schema().field(column).map(|field| field.id) else {
|
||||
return Ok(IndexCoverage::Degraded {
|
||||
reason: format!("column '{}' not in schema", column),
|
||||
});
|
||||
};
|
||||
let indices = ds
|
||||
.load_indices()
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
let has_btree = indices
|
||||
.iter()
|
||||
.filter(|index| !is_system_index(index))
|
||||
.filter(|index| index.fields.len() == 1 && index.fields[0] == field_id)
|
||||
.any(|index| {
|
||||
index
|
||||
.index_details
|
||||
.as_ref()
|
||||
.map(|details| details.type_url.ends_with("BTreeIndexDetails"))
|
||||
.unwrap_or(false)
|
||||
});
|
||||
if !has_btree {
|
||||
return Ok(IndexCoverage::Degraded {
|
||||
reason: format!("no BTREE index on '{}'", column),
|
||||
});
|
||||
}
|
||||
// Same check Lance runs: a fragment missing physical_rows disables
|
||||
// scalar indices for the entire scan (all-or-nothing).
|
||||
if ds.fragments().iter().any(|f| f.physical_rows.is_none()) {
|
||||
return Ok(IndexCoverage::Degraded {
|
||||
reason: "a fragment is missing physical_rows".to_string(),
|
||||
});
|
||||
}
|
||||
Ok(IndexCoverage::Indexed)
|
||||
}
|
||||
|
||||
pub async fn count_rows(&self, ds: &Dataset, filter: Option<String>) -> Result<usize> {
|
||||
ds.count_rows(filter)
|
||||
.await
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ mod helpers;
|
|||
use arrow_array::{Array, StringArray};
|
||||
|
||||
use omnigraph::db::Omnigraph;
|
||||
use omnigraph::table_store::{IndexCoverage, TableStore};
|
||||
use omnigraph_compiler::ir::ParamMap;
|
||||
use serial_test::serial;
|
||||
|
||||
|
|
@ -61,6 +62,33 @@ async fn both_modes(db: &mut Omnigraph, queries: &str, name: &str, params: &Para
|
|||
indexed
|
||||
}
|
||||
|
||||
// The C6 index-coverage guard: `key_column_index_coverage` must report whether
|
||||
// a `key_col IN (...)` scan will use the persisted BTREE or silently full-scan.
|
||||
// Not #[serial] — it calls the helper directly and reads no env.
|
||||
#[tokio::test]
|
||||
async fn key_column_index_coverage_detects_btree_presence() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let db = init_and_load(&dir).await;
|
||||
let snap = snapshot_main(&db).await.unwrap();
|
||||
|
||||
// Edge `src` gets a BTREE from ensure_indices on load → Indexed.
|
||||
let edge_ds = snap.open("edge:Knows").await.unwrap();
|
||||
let src_cov = TableStore::key_column_index_coverage(&edge_ds, "src")
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(src_cov, IndexCoverage::Indexed, "edge src is BTREE-indexed");
|
||||
|
||||
// A node property column with no scalar index → Degraded (the warn path).
|
||||
let node_ds = snap.open("node:Person").await.unwrap();
|
||||
let age_cov = TableStore::key_column_index_coverage(&node_ds, "age")
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(
|
||||
matches!(age_cov, IndexCoverage::Degraded { .. }),
|
||||
"non-indexed column should be Degraded, got {age_cov:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[serial]
|
||||
async fn indexed_matches_csr_one_hop_same_type() {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue