From 57348cf7fa16fe30e9ebeb3b0d76f65ce69a5bfe Mon Sep 17 00:00:00 2001 From: Andrew Altshuler Date: Fri, 19 Jun 2026 18:42:56 +0300 Subject: [PATCH] fix(engine): preserve identifier case in filter pushdown (#283) (#285) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * test(engine): regression tests for #283 camelCase property filters Red against current code. A query (or chained mutation) that filters on a camelCase schema field lints and plans cleanly but fails at run time with "No field named reponame" because the identifier's case is destroyed at the engine->Lance boundary. Coverage added: - query.rs unit: ir_filter_to_expr on a camelCase property must emit an Expr::Column named `repoName`, not `reponame` (red); plus a green coercion guard that a camelCase int column still gets a coerced literal. - mutation.rs unit: predicate_to_sql must emit the column UNQUOTED and case-preserved (green guard documenting the committed-scan contract). - literal_filters.rs e2e: a camelCase @index field with an inline-binding pushdown filter returns the seeded row (red — read pushdown). - writes.rs e2e: an update+delete on a camelCase predicate, and a chained update that re-reads the pending side of scan_with_pending by the same camelCase predicate (red — pending MemTable scan). Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01FQ1Hf4eXLsJmeLUkTYBEw7 * fix(engine): preserve identifier case in filter pushdown (#283) Two engine->Lance boundaries lowercased camelCase column identifiers, breaking any filter on a camelCase schema field even though the IR, compiler, projection, and in-memory filtering all preserve case. Read pushdown (exec/query.rs, ir_expr_to_expr): build the column reference with datafusion::prelude::ident() instead of col(). col() routes through SQL identifier normalization and lowercases an unquoted identifier (`repoName` -> `reponame`); ident() builds an unqualified, case-preserved Column. Property refs here are always bare column names, so there is no qualified-name handling to lose. No-op for the lowercase columns that work today. Pending mutation scan (table_store.rs, scan_pending_batches): the committed-scan consumer (Lance Scanner::filter(&str)) preserves an unquoted identifier's case but treats a double-quoted "col" as a string literal, so predicate_to_sql must keep the column unquoted. The pending side splices that same unquoted predicate into a DataFusion `SELECT ... WHERE`, which would lowercase it. Make that path case-preserving by disabling sql_parser.enable_ident_normalization on its SessionContext rather than quoting (quoting would match zero committed rows). predicate_to_sql gains only a clarifying comment; its emitted string is unchanged. Full engine suite green (579 tests). Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01FQ1Hf4eXLsJmeLUkTYBEw7 * docs(dev): case study for #283 camelCase filter bug Record the root cause, the two-boundary fix (read pushdown col→ident; pending mutation scan ident-normalization off), and why the obvious symmetric "quote the column" fix is wrong (Lance reads a double-quoted column as a string literal and silently matches zero committed rows). Linked from a new "Case Studies" section in the dev index so the link check passes. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01FQ1Hf4eXLsJmeLUkTYBEw7 --------- Co-authored-by: Claude Opus 4.8 (1M context) --- crates/omnigraph/src/exec/mutation.rs | 32 ++++ crates/omnigraph/src/exec/query.rs | 65 ++++++- crates/omnigraph/src/table_store.rs | 10 +- crates/omnigraph/tests/literal_filters.rs | 26 +++ crates/omnigraph/tests/writes.rs | 67 +++++++ docs/dev/bug-case-fix.md | 217 ++++++++++++++++++++++ docs/dev/index.md | 10 + 7 files changed, 424 insertions(+), 3 deletions(-) create mode 100644 docs/dev/bug-case-fix.md diff --git a/crates/omnigraph/src/exec/mutation.rs b/crates/omnigraph/src/exec/mutation.rs index 9fcff45..fbd0751 100644 --- a/crates/omnigraph/src/exec/mutation.rs +++ b/crates/omnigraph/src/exec/mutation.rs @@ -477,6 +477,12 @@ fn predicate_to_sql( } }; + // #283: emit the column UNQUOTED. Lance's `Scanner::filter(&str)` (the + // committed-scan consumer) preserves an unquoted identifier's case but + // treats a double-quoted `"col"` as a string literal, so quoting here + // would silently match zero committed rows. The pending-batch MemTable + // query is instead made case-preserving by disabling DataFusion identifier + // normalization on its `SessionContext` (see `scan_pending_batches`). Ok(format!("{} {} {}", column, op, value_sql)) } @@ -1477,3 +1483,29 @@ fn enrich_mutation_params(params: &ParamMap) -> Result { } Ok(resolved) } + +#[cfg(test)] +mod predicate_sql_tests { + use super::*; + + // #283: a camelCase column in a mutation predicate must be emitted + // UNQUOTED and case-preserved. The committed-scan consumer, Lance's + // `Scanner::filter(&str)`, preserves an unquoted identifier's case but + // treats a double-quoted `"col"` as a string literal (which silently + // matches zero rows), so the predicate string must not quote the column. + // The pending MemTable path stays case-preserving by disabling DataFusion + // identifier normalization on its context, not by quoting here. + #[test] + fn predicate_to_sql_preserves_camelcase_column_unquoted() { + let predicate = IRMutationPredicate { + property: "repoName".to_string(), + op: CompOp::Eq, + value: IRExpr::Literal(Literal::String("acme".into())), + }; + let sql = predicate_to_sql(&predicate, &ParamMap::new(), false).unwrap(); + assert_eq!( + sql, "repoName = 'acme'", + "column must be unquoted and case-preserved, got {sql}" + ); + } +} diff --git a/crates/omnigraph/src/exec/query.rs b/crates/omnigraph/src/exec/query.rs index e922075..23e1434 100644 --- a/crates/omnigraph/src/exec/query.rs +++ b/crates/omnigraph/src/exec/query.rs @@ -2149,9 +2149,13 @@ pub(super) fn ir_expr_to_expr( params: &ParamMap, target: Option<&arrow_schema::DataType>, ) -> Option { - use datafusion::prelude::col; + use datafusion::prelude::ident; match expr { - IRExpr::PropAccess { property, .. } => Some(col(property)), + // #283: `ident()` preserves the identifier's case. `col()` would route + // through SQL identifier normalization and lowercase an unquoted + // camelCase column (`repoName` → `reponame`), which then fails to + // resolve against the case-sensitive Lance/Arrow schema. + IRExpr::PropAccess { property, .. } => Some(ident(property)), IRExpr::Literal(l) => literal_to_expr_coerced(l, target), IRExpr::Param(name) => params .get(name) @@ -2656,4 +2660,61 @@ mod literal_lowering_tests { "reversed-operand literal must coerce to the Int32 column type, got {expr:?}" ); } + + // Name of the left operand's column in a binary comparison `col OP lit`. + fn binary_left_column_name(e: &Expr) -> Option { + match e { + Expr::BinaryExpr(b) => match b.left.as_ref() { + Expr::Column(c) => Some(c.name.clone()), + _ => None, + }, + _ => None, + } + } + + // #283: a camelCase property must reach the scan as its exact column name, + // not a SQL-normalized (lowercased) one. `col()` lowercases unquoted + // identifiers; the pushed-down column ref must stay `repoName`. + #[test] + fn ir_filter_preserves_camelcase_column_name() { + use arrow_schema::{DataType, Field}; + let schema = arrow_schema::Schema::new(vec![Field::new("repoName", DataType::Utf8, true)]); + let filter = IRFilter { + left: IRExpr::PropAccess { + variable: "d".into(), + property: "repoName".into(), + }, + op: CompOp::Eq, + right: IRExpr::Literal(Literal::String("acme".into())), + }; + let expr = ir_filter_to_expr(&filter, &ParamMap::new(), Some(&schema)).unwrap(); + assert_eq!( + binary_left_column_name(&expr).as_deref(), + Some("repoName"), + "camelCase column must be preserved (not lowercased to `reponame`), got {expr:?}" + ); + } + + // Index preservation: a camelCase numeric column still coerces its literal + // (so the scalar BTREE stays eligible) — the col→ident fix must not disturb + // the coercion path (which resolves the column type via field_with_name). + #[test] + fn ir_filter_coerces_literal_for_camelcase_int_column() { + use arrow_schema::{DataType, Field}; + let schema = + arrow_schema::Schema::new(vec![Field::new("itemCount", DataType::Int32, true)]); + let filter = IRFilter { + left: IRExpr::PropAccess { + variable: "m".into(), + property: "itemCount".into(), + }, + op: CompOp::Eq, + right: IRExpr::Literal(Literal::Integer(2)), + }; + let expr = ir_filter_to_expr(&filter, &ParamMap::new(), Some(&schema)).unwrap(); + assert!( + binary_has_int32_literal(&expr), + "camelCase int column must keep its coerced Int32 literal (BTREE-eligible), got {expr:?}" + ); + } } diff --git a/crates/omnigraph/src/table_store.rs b/crates/omnigraph/src/table_store.rs index 0325e1e..511508f 100644 --- a/crates/omnigraph/src/table_store.rs +++ b/crates/omnigraph/src/table_store.rs @@ -1883,7 +1883,15 @@ async fn scan_pending_batches( filter: Option<&str>, ) -> Result> { let schema = pending_schema.unwrap_or_else(|| pending_batches[0].schema()); - let ctx = datafusion::execution::context::SessionContext::new(); + // #283: disable SQL identifier normalization so an unquoted camelCase + // column in `filter` (e.g. `repoName = 'acme'`, emitted unquoted by + // `predicate_to_sql` because the committed Lance scan needs it unquoted) + // is matched case-preserving against the case-sensitive MemTable schema. + // Without this, DataFusion lowercases `repoName` → `reponame` and fails to + // resolve. Quoted identifiers (the projection list below) are unaffected. + let mut config = datafusion::execution::context::SessionConfig::new(); + config.options_mut().sql_parser.enable_ident_normalization = false; + let ctx = datafusion::execution::context::SessionContext::new_with_config(config); let mem = datafusion::datasource::MemTable::try_new(schema, vec![pending_batches.to_vec()]) .map_err(|e| OmniError::Lance(e.to_string()))?; ctx.register_table("pending", Arc::new(mem)) diff --git a/crates/omnigraph/tests/literal_filters.rs b/crates/omnigraph/tests/literal_filters.rs index d486f28..9fb480a 100644 --- a/crates/omnigraph/tests/literal_filters.rs +++ b/crates/omnigraph/tests/literal_filters.rs @@ -145,3 +145,29 @@ query seen_eq() { match { $m: Metric { seen: datetime("2024-06-01T12:00:00Z") } assert_eq!(sorted_metric_names(&mut db, q, "born_eq").await, vec!["m1"]); assert_eq!(sorted_metric_names(&mut db, q, "seen_eq").await, vec!["m1"]); } + +// #283: a property-match on a camelCase `@index` field must execute, not fail +// with "No field named reponame" at the Lance scan. Exercises the pushdown arm +// (inline binding `Doc { repoName: $r }`) end-to-end. +const CC_SCHEMA: &str = r#" +node Doc { + slug: String @key + repoName: String @index +} +"#; +const CC_DATA: &str = r#"{"type":"Doc","data":{"slug":"d1","repoName":"acme"}} +{"type":"Doc","data":{"slug":"d2","repoName":"globex"}}"#; + +#[tokio::test] +async fn camelcase_property_filter_executes() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let mut db = Omnigraph::init(uri, CC_SCHEMA).await.unwrap(); + load_jsonl(&mut db, CC_DATA, LoadMode::Overwrite).await.unwrap(); + + let q = r#"query by_repo($r: String) { match { $d: Doc { repoName: $r } } return { $d.slug } }"#; + let r = query_main(&mut db, q, "by_repo", ¶ms(&[("$r", "acme")])) + .await + .expect("camelCase property filter must execute, not fail at the Lance scan"); + assert_eq!(r.num_rows(), 1, "expected exactly the d1 row for repoName=acme"); +} diff --git a/crates/omnigraph/tests/writes.rs b/crates/omnigraph/tests/writes.rs index 8120940..9cb8689 100644 --- a/crates/omnigraph/tests/writes.rs +++ b/crates/omnigraph/tests/writes.rs @@ -1646,3 +1646,70 @@ async fn branch_cascade_delete_forks_node_and_edges_under_held_queues() { "main must be untouched by the branch delete" ); } + +// #283: a mutation predicate (`where camelField = ...`) on a camelCase column +// must execute, not fail at the Lance scan with "No field named ...". Covers +// both `update` (committed scan via scan_with_pending) and `delete` +// (delete_where), which share the same emitted SQL filter string. +const CC_SCHEMA: &str = r#" +node Doc { + slug: String @key + repoName: String @index + status: String? +} +"#; +const CC_DATA: &str = r#"{"type":"Doc","data":{"slug":"d1","repoName":"acme","status":"open"}} +{"type":"Doc","data":{"slug":"d2","repoName":"globex","status":"open"}}"#; + +#[tokio::test] +async fn camelcase_mutation_predicate_updates_and_deletes() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let mut db = Omnigraph::init(uri, CC_SCHEMA).await.unwrap(); + load_jsonl(&mut db, CC_DATA, LoadMode::Overwrite).await.unwrap(); + + let m = r#" +query set_status($repo: String, $st: String) { update Doc set { status: $st } where repoName = $repo } +query del($repo: String) { delete Doc where repoName = $repo } +"#; + + let upd = db + .mutate("main", m, "set_status", ¶ms(&[("$repo", "acme"), ("$st", "closed")])) + .await + .expect("update with a camelCase predicate must execute"); + assert_eq!(upd.affected_nodes, 1, "exactly the acme Doc should update"); + + let del = db + .mutate("main", m, "del", ¶ms(&[("$repo", "globex")])) + .await + .expect("delete with a camelCase predicate must execute"); + assert_eq!(del.affected_nodes, 1, "exactly the globex Doc should delete"); + + assert_eq!(count_rows(&db, "node:Doc").await, 1, "one Doc (acme) should remain"); +} + +// #283 (pending side): a chained mutation whose 2nd op filters a camelCase +// column must read op-1's staged rows through the pending DataFusion `MemTable` +// (`SELECT … WHERE {filter}` via ctx.sql), which lowercases unquoted idents. +// This is the path the single update/delete above does NOT exercise. +#[tokio::test] +async fn camelcase_chained_mutation_reads_pending_by_camelcase() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let mut db = Omnigraph::init(uri, CC_SCHEMA).await.unwrap(); + load_jsonl(&mut db, CC_DATA, LoadMode::Overwrite).await.unwrap(); + + // op-1 stages a status change to the acme Doc; op-2 re-filters the same + // camelCase column, so it must match op-1's pending row. + let m = r#" +query chain($repo: String) { + update Doc set { status: "stage1" } where repoName = $repo + update Doc set { status: "stage2" } where repoName = $repo +} +"#; + let r = db + .mutate("main", m, "chain", ¶ms(&[("$repo", "acme")])) + .await + .expect("chained camelCase mutation must read the pending row, not fail at the MemTable SELECT"); + assert_eq!(r.affected_nodes, 2, "both ops should touch the acme Doc (read-your-writes)"); +} diff --git a/docs/dev/bug-case-fix.md b/docs/dev/bug-case-fix.md new file mode 100644 index 0000000..d5d596e --- /dev/null +++ b/docs/dev/bug-case-fix.md @@ -0,0 +1,217 @@ +# Bug case study: camelCase property filters lowercased at runtime + +**Issue:** [#283](https://github.com/ModernRelay/omnigraph/issues/283) (mirrored +in the dev-graph as `iss-990`) +**Reported on:** 0.7.0 (release binary) +**Status of code:** present on `v0.7.0`; fixed on branch `fix/iss-283-camelcase-filter` (read pushdown + pending mutation scan) +**Severity:** correctness — a valid, lint-clean query fails at run time. + +## Symptom + +A read query that filters on a **camelCase** schema field lints and plans +cleanly but fails when it executes: + +```text +No field named reponame. Column names are case sensitive. +``` + +Minimal repro: + +```pg +node SourceDocument { + repoName: String @index +} +``` + +```gq +query find($repoName: String) { + match { $d: SourceDocument { repoName: $repoName } } + return { $d.repoName } +} +``` + +`omnigraph lint` passes; running the query errors. The operator workaround is to +rename the field to all-lowercase (`repo`), which is why this looked like a +schema-design quirk rather than an engine bug. + +## Root cause + +The filter-pushdown path builds the Lance scan predicate's column reference with +`datafusion::prelude::col(property)`: + +- **Site:** `crates/omnigraph/src/exec/query.rs` — `ir_expr_to_expr`: + ```rust + IRExpr::PropAccess { property, .. } => Some(col(property)), + ``` +- `col(&str)` runs DataFusion's SQL **identifier normalization** + (`Column::from_qualified_name` → `parse_identifiers_normalized(.., false)`), + which **lowercases unquoted identifiers**. So `col("repoName")` resolves to a + column named `reponame`. +- Lance stores columns **case-preserved** (`repoName`) and resolves them + case-sensitively, so the scan can't find `reponame` and errors. + +The IR is not at fault: the parser and lowering preserve the original case +(`property: pm.prop_name.clone()`), which is exactly why the compiler resolves +`repoName` and **lint passes**. The case is destroyed only at the +engine → Lance boundary. + +There is a **second** boundary with the same root cause but a *different* +parser: the pending-batch scan in `table_store.rs::scan_pending_batches` splices +the mutation predicate string into a DataFusion `SELECT … WHERE {filter}` over a +`MemTable`, and DataFusion's SQL parser lowercases the unquoted column the same +way (`repoName` → `reponame`). See **Part 2** of the fix — it surfaces only on a +*chained* mutation that re-reads the pending side, which is why a single +update/delete on a camelCase predicate looked fine. + +### Why the rest of the engine is unaffected + +The two pushdown sites above were the offenders; the remaining paths already +treat column names case-sensitively and handle camelCase correctly: + +- **Projection / return** uses the real Arrow field name (`f.name()`). +- **In-memory filtering** (the fallback for non-pushable predicates) looks the + column up by the preserved property name against the batch schema. +- **The committed Lance mutation scan** (`Scanner::filter(&str)`) preserves an + unquoted identifier's case, so committed-row matching on a camelCase predicate + already worked. + +So the read bug surfaces for predicates that *are* pushed down (e.g. an equality +on a scalar camelCase column), and the mutation bug only for the pending-side +re-scan of a chained mutation. + +### Why it slipped through + +The `ir_filter_to_expr` unit tests only use the all-lowercase field `count`, so +no test exercised a camelCase property. Nothing in CI compared the emitted +column name against the schema's casing. + +## Fix + +There are **two** engine→Lance boundaries that lose case, and they need +**different** fixes because the two consumers disagree on quoting semantics. + +### Part 1 — read pushdown (`exec/query.rs`, `ir_expr_to_expr`) + +Use DataFusion's case-preserving column constructor, `ident()`, instead of +`col()`: + +```rust +IRExpr::PropAccess { property, .. } => Some(datafusion::prelude::ident(property)), +``` + +`ident()` builds `Expr::Column(Column::new_unqualified(property))` with no SQL +parse and no normalization, so the case is preserved. Property references here +are always bare column names (the variable is dropped via `..`), so there is no +qualified-name (`a.b`) handling to lose. + +This is the right layer and the right shape: + +- It is a **no-op for the lowercase columns that work today** (`slug`, `id`, + `status`, …) — lowercasing those was already a no-op — so there is no + regression risk for the common case. +- It makes pushdown **consistent** with projection and in-memory filtering, + which already use case-preserved names. +- It also restores **index use** for camelCase columns: today such a filter + errors before the BTREE is even considered. + +### Part 2 — pending mutation scan (`table_store.rs`, `scan_pending_batches`) + +`update`/`delete` predicates lower through `predicate_to_sql(..)` into a single +**SQL string** (`format!("{} {} {}", column, op, value_sql)`). That one string +is consumed by **two** different parsers, and *they disagree on what quoting +means*: + +- The **committed** side passes the string to Lance's `Scanner::filter(&str)`. + Lance **preserves an unquoted identifier's case** (so unquoted camelCase + *already works* on the committed scan) but treats a double-quoted `"col"` as a + **string literal** — `"repoName" = 'acme'` parses as `'repoName' = 'acme'`, + a constant-false predicate that silently matches **zero** committed rows. +- The **pending** side splices the same string into a DataFusion + `SELECT … FROM pending WHERE {filter}` over a `MemTable`. DataFusion's SQL + parser **lowercases** an unquoted identifier (`repoName` → `reponame`) and + fails to resolve against the case-sensitive `MemTable` schema. + +So no single quoting choice for the column satisfies both: quoting fixes the +pending side but breaks the committed side, and vice versa. The fix keeps the +predicate **unquoted** (what the committed Lance scan needs) and makes the +*pending* context case-preserving instead, by disabling SQL identifier +normalization on its `SessionContext`: + +```rust +let mut config = SessionConfig::new(); +config.options_mut().sql_parser.enable_ident_normalization = false; +let ctx = SessionContext::new_with_config(config); +``` + +`predicate_to_sql` itself never lowercased anything (it copies the preserved +property name), so its emitted string is unchanged — it gains only a comment +recording the unquoted contract. The projection list in the same function is +already double-quoted and is unaffected (quoted identifiers are case-preserved +under either normalization setting). + +Rejected alternatives: banning/normalizing camelCase at the compiler (a real +usability regression — camelCase fields are legitimate), lowercasing column +names in storage (a breaking on-disk change), merely making lint *warn* (a +band-aid that leaves the runtime broken), or **quoting the column in +`predicate_to_sql`** (empirically breaks 7 existing lowercase-column mutation +tests because Lance reads `"col"` as a string literal — see Part 2). + +## Scope and caveats + +- **Not Windows-specific.** The original report's environment was Windows, but + the cause is platform-independent. +- **The mutation path was only *partially* broken, and not where first + assumed.** The committed side of `scan_with_pending(..)` (Lance + `Scanner::filter(&str)`) and `delete`'s `delete_where(..)` / `Dataset::delete` + preserve an unquoted identifier's case, so a *single* `update`/`delete` on a + camelCase predicate already worked. Only the **pending** side — the in-memory + `MemTable` re-scan that a *chained* mutation hits — lowercased the column. + This was confirmed empirically: a single update+delete on `repoName` passes + unfixed; a chained update that re-reads the pending side fails with + `No field named reponame`. The fix is Part 2 above (disable identifier + normalization on the pending `SessionContext`), **not** quoting the column. + The eventual MR-A migration (`delete_where` → Lance 7 + `DeleteBuilder::execute_uncommitted`, structured `Expr`) is the longer-term + shape but is out of scope here. +- **Check the coercion lookup.** Adjacent to the fix, the literal-coercion step + (`prop_data_type(.., schema)`, which keeps the BTREE usable) also resolves the + column by name. Confirm it uses the preserved name; if it mishandles case a + camelCase filter would resolve but lose its index — a silent perf regression, + not a crash. +- **Do not use `col(r#""repoName""#)` as the general read-path fix.** Quoting + would preserve this one name, but it routes through SQL identifier parsing and + changes qualified-name semantics. The IR property here is already a bare + column name, so `ident(property)` / `Column::new_unqualified(property)` is the + precise structured expression. +- **Do not "fix" the mutation string by quoting the column.** It is tempting to + reuse a `quote_ident` helper symmetric with `literal_to_sql`'s value escaping, + but the column quote-rules differ between the two consumers of the predicate + string: Lance's `Scanner::filter(&str)` reads `"col"` as a *string literal* + (silently matching nothing), while DataFusion's `ctx.sql` reads it as a + case-preserved identifier. Because the committed Lance scan already preserves + the *unquoted* identifier's case, the column must stay unquoted and the + pending DataFusion context must be told not to normalize — not the reverse. + +## Validation (test-first) + +1. **Red:** add an `ir_filter_to_expr` test asserting the emitted + `Expr::Column` name for a camelCase property is `repoName`, not `reponame`. + Fails on current code. +2. **Green:** apply the `col` → `ident` change (Part 1) and the pending-context + `enable_ident_normalization = false` change (Part 2). +3. **End-to-end:** a camelCase `@index` field with + `match { T { camelField: $x } }` returns the row (the unit test alone can't + catch an engine↔Lance boundary regression). +4. **Mutation parity:** with the same camelCase field, cover: + - `update T where camelField == $x set otherField = ...` updates the intended + row. + - `delete T where camelField == $x` deletes the intended row and cascades as + expected. + - A chained update that hits the pending side of `scan_with_pending` still + works, so both the committed Lance scan and pending DataFusion `MemTable` + predicate paths are case-preserving. +5. **Index preservation:** keep or add a plan/trace assertion that the + camelCase `@index` equality predicate still reaches the scalar-index path. + A result-only test can pass while silently falling back to a full scan. +6. Run the full engine suite (`cargo test -p omnigraph-engine`) — in particular + the existing BTREE index-eligibility tests, which `ident()` must not disturb. diff --git a/docs/dev/index.md b/docs/dev/index.md index 1fc0b77..91f108b 100644 --- a/docs/dev/index.md +++ b/docs/dev/index.md @@ -62,6 +62,16 @@ The `docs/rfcs/` track is the **public, externally-authorable** RFC process. The maintainer/internal RFCs below (`rfc-00N-*.md`) are a separate, team-owned track; don't conflate the two. +## Case Studies + +Worked write-ups of specific bugs — root cause, fix, and the reasoning that +ruled out the tempting-but-wrong alternatives. Read these for the debugging +pattern, not just the outcome. + +| Area | Read | +|---|---| +| camelCase property filters lowercased at runtime (#283) — two engine→Lance boundaries, two different fixes | [bug-case-fix.md](bug-case-fix.md) | + ## Active Implementation Plans Working documents for in-flight feature work. Removed when the work lands.