mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-24 02:38:06 +02:00
Greptile P2 on #285: the #283 tests prove correctness (right rows, type-level coercion) but not that a camelCase @index equality actually reaches the scalar-index path — a result-only test passes on a silent full-scan fallback, exactly the gap testing.md warns about and the bug-case-fix.md validation checklist (step 5) promised to close. Add lance-surface Guard 20 (mirrors Guard 16): build a BTREE on a camelCase column and assert the scan plan contains `ScalarIndexQuery` under the fix's case-preserving `ident()` expr, and that the pre-fix `col()` expr fails to plan (it normalizes `repoName` → a nonexistent `reponame`). A regression that breaks camelCase index routing — or reverts to `col()` — turns this red instead of degrading to a full scan. The existing e2e (`camelcase_property_filter_executes`) already guards the engine call-site (a `col()` revert errors there). Claude-Session: https://claude.ai/code/session_01FQ1Hf4eXLsJmeLUkTYBEw7 Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
57348cf7fa
commit
2d34d7c432
1 changed files with 77 additions and 0 deletions
|
|
@ -991,3 +991,80 @@ async fn unenforced_primary_key_is_immutable_once_set() {
|
|||
— revisit migrate_v1_to_v2's field-guard and re-pin docs/dev/lance.md."
|
||||
);
|
||||
}
|
||||
|
||||
// --- Guard 20: camelCase @index equality routes to the scalar index (#283) ----
|
||||
//
|
||||
// The #283 read-pushdown fix builds the filter column with datafusion `ident()`
|
||||
// (case-preserving) instead of `col()` (SQL identifier normalization, which
|
||||
// lowercases an unquoted name). The correctness tests in literal_filters.rs /
|
||||
// writes.rs prove the right rows come back, but a result-only assertion also
|
||||
// passes on a full-scan fallback — exactly the gap testing.md warns about. This
|
||||
// guard pins the *plan*: an equality on a camelCase BTREE column must compile to
|
||||
// a `ScalarIndexQuery` under the fix's expr shape, and must NOT under the old
|
||||
// `col()` shape (which lowercases `repoName` → a nonexistent `reponame`). A
|
||||
// regression that breaks camelCase index routing — or a revert to `col()` —
|
||||
// turns this red instead of silently degrading to a full scan.
|
||||
#[tokio::test]
|
||||
async fn camelcase_index_equality_routes_to_scalar_index() {
|
||||
use datafusion::physical_plan::displayable;
|
||||
use datafusion::prelude::{col, ident, lit};
|
||||
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().join("camelcase_index.lance");
|
||||
let uri = uri.to_str().unwrap();
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Utf8, false),
|
||||
Field::new("repoName", DataType::Utf8, false),
|
||||
]));
|
||||
let batch = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(StringArray::from(vec!["a", "b", "c", "d"])),
|
||||
Arc::new(StringArray::from(vec![
|
||||
"acme", "globex", "initech", "umbrella",
|
||||
])),
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
||||
let params = WriteParams {
|
||||
mode: WriteMode::Create,
|
||||
enable_stable_row_ids: true,
|
||||
data_storage_version: Some(LanceFileVersion::V2_2),
|
||||
..Default::default()
|
||||
};
|
||||
let mut ds = Dataset::write(reader, uri, Some(params)).await.unwrap();
|
||||
ds.create_index_builder(&["repoName"], IndexType::BTree, &ScalarIndexParams::default())
|
||||
.replace(true)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
async fn plan_str(ds: &Dataset, filter: datafusion::prelude::Expr) -> lance::Result<String> {
|
||||
let mut scanner = ds.scan();
|
||||
scanner.filter_expr(filter);
|
||||
let plan = scanner.create_plan().await?;
|
||||
Ok(format!("{}", displayable(plan.as_ref()).indent(true)))
|
||||
}
|
||||
|
||||
// The fix's shape: ident() preserves case → resolves `repoName` → index.
|
||||
let used = plan_str(&ds, ident("repoName").eq(lit("acme")))
|
||||
.await
|
||||
.expect("ident(\"repoName\") must plan against the case-preserved schema");
|
||||
assert!(
|
||||
used.contains("ScalarIndexQuery"),
|
||||
"camelCase @index equality must route to the scalar index (not full scan), got:\n{used}"
|
||||
);
|
||||
|
||||
// The pre-fix shape: col() normalizes `repoName` → `reponame`, which does not
|
||||
// exist in the case-sensitive schema, so planning fails. This is precisely
|
||||
// why `col()` could never reach the index and surfaced the #283 runtime error
|
||||
// — it could not silently full-scan past the index either.
|
||||
let err = plan_str(&ds, col("repoName").eq(lit("acme"))).await;
|
||||
assert!(
|
||||
err.is_err(),
|
||||
"col() lowercases repoName→reponame against a case-sensitive schema; \
|
||||
planning must fail rather than resolve, confirming ident() is required \
|
||||
for camelCase index routing. got plan:\n{err:?}"
|
||||
);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue