mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-15 01:55:13 +02:00
fix(engine): lower date/datetime filter literals as typed Arrow scalars
`literal_to_expr` lowered `Date`/`DateTime` query literals as Utf8 strings, relying on DataFusion implicit casts. Against a physical `Date32`/`Date64` column that can coerce the column side (`CAST(col AS Utf8)`), which defeats a scalar BTREE and degrades the scan to a full filtered read. Lower to typed `Date32`/`Date64` scalars instead (reusing the loader's `parse_date32_literal`/`parse_date64_literal`, already used by the in-memory comparison arm), so the predicate stays a direct column comparison and the index is used. Malformed literals fall back to the Utf8 string so pushdown behavior never regresses. Tests: unit goldens asserting the lowered literal is typed (red before, green after) + inline-binding pushdown equality in literal_filters confirming the epoch conversion selects the right rows.
This commit is contained in:
parent
e4334deb14
commit
e4ef67b0bb
2 changed files with 63 additions and 5 deletions
|
|
@ -2073,11 +2073,24 @@ fn literal_to_expr(lit: &Literal) -> Option<datafusion::prelude::Expr> {
|
|||
Literal::Integer(n) => df_lit(*n),
|
||||
Literal::Float(f) => df_lit(*f),
|
||||
Literal::Bool(b) => df_lit(*b),
|
||||
// Date/DateTime stored as strings; pass through as string literals
|
||||
// — Lance/DataFusion handles the comparison against typed columns
|
||||
// via implicit cast, matching the existing string-SQL behavior.
|
||||
Literal::Date(s) => df_lit(s.clone()),
|
||||
Literal::DateTime(s) => df_lit(s.clone()),
|
||||
// Date/DateTime columns are physically Date32/Date64 (see the loader's
|
||||
// `to_arrow`). Lower the literal to the matching TYPED Arrow scalar so
|
||||
// the predicate stays a direct column comparison and the persisted
|
||||
// BTREE is used. A Utf8 literal would force DataFusion to coerce one
|
||||
// side; if it casts the column (`CAST(col AS Utf8)`) the scalar index
|
||||
// is defeated and the scan degrades to a full filtered read. This
|
||||
// matches the already-typed in-memory comparison arm in
|
||||
// `projection.rs::literal_to_array`. On a malformed literal, fall back
|
||||
// to the Utf8 string so pushdown behavior never regresses (the
|
||||
// in-memory path surfaces the parse error if it is load-bearing).
|
||||
Literal::Date(s) => match crate::loader::parse_date32_literal(s) {
|
||||
Ok(days) => df_lit(datafusion::scalar::ScalarValue::Date32(Some(days))),
|
||||
Err(_) => df_lit(s.clone()),
|
||||
},
|
||||
Literal::DateTime(s) => match crate::loader::parse_date64_literal(s) {
|
||||
Ok(ms) => df_lit(datafusion::scalar::ScalarValue::Date64(Some(ms))),
|
||||
Err(_) => df_lit(s.clone()),
|
||||
},
|
||||
Literal::List(_) => return None,
|
||||
})
|
||||
}
|
||||
|
|
@ -2285,3 +2298,42 @@ mod expand_chooser_tests {
|
|||
assert_eq!(choose_expand_mode(&i), ExpandMode::Csr);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod literal_lowering_tests {
|
||||
use super::*;
|
||||
use datafusion::prelude::Expr;
|
||||
use datafusion::scalar::ScalarValue;
|
||||
|
||||
// Date/DateTime filter literals must lower to TYPED Arrow scalars
|
||||
// (Date32 / Date64), not Utf8 strings. A Utf8 literal against a typed
|
||||
// Date column forces DataFusion to coerce one side; if it casts the
|
||||
// column (`CAST(col AS Utf8)`) the persisted BTREE is defeated and the
|
||||
// scan falls back to a full filtered read. A typed literal keeps the
|
||||
// predicate a direct column comparison so the scalar index is used.
|
||||
#[test]
|
||||
fn date_literals_lower_to_typed_arrow_scalars() {
|
||||
let dt = literal_to_expr(&Literal::DateTime("2024-06-01T12:00:00Z".into())).unwrap();
|
||||
assert!(
|
||||
matches!(dt, Expr::Literal(ScalarValue::Date64(Some(_)), ..)),
|
||||
"DateTime literal must lower to a typed Date64 scalar, got {dt:?}"
|
||||
);
|
||||
let d = literal_to_expr(&Literal::Date("2024-06-01".into())).unwrap();
|
||||
assert!(
|
||||
matches!(d, Expr::Literal(ScalarValue::Date32(Some(_)), ..)),
|
||||
"Date literal must lower to a typed Date32 scalar, got {d:?}"
|
||||
);
|
||||
}
|
||||
|
||||
// A malformed date string must not panic or error in the (infallible)
|
||||
// lowering: it falls back to the Utf8 literal so pushdown behavior never
|
||||
// regresses (the in-memory path surfaces the parse error if it matters).
|
||||
#[test]
|
||||
fn malformed_date_literal_falls_back_to_string() {
|
||||
let bad = literal_to_expr(&Literal::DateTime("not-a-date".into())).unwrap();
|
||||
assert!(
|
||||
matches!(bad, Expr::Literal(ScalarValue::Utf8(Some(_)), ..)),
|
||||
"malformed DateTime literal should fall back to a Utf8 literal, got {bad:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -88,9 +88,15 @@ async fn date_and_datetime_literal_filters_execute() {
|
|||
let q = r#"
|
||||
query born_ge() { match { $m: Metric $m.born >= date("2024-01-01") } return { $m.name } }
|
||||
query seen_lt() { match { $m: Metric $m.seen < datetime("2024-01-01T00:00:00Z") } return { $m.name } }
|
||||
query born_eq() { match { $m: Metric { born: date("2024-06-01") } } return { $m.name } }
|
||||
query seen_eq() { match { $m: Metric { seen: datetime("2024-06-01T12:00:00Z") } } return { $m.name } }
|
||||
"#;
|
||||
// born: m1 2024-06, m3 2025 >= 2024-01-01
|
||||
assert_eq!(sorted_metric_names(&mut db, q, "born_ge").await, vec!["m1", "m3"]);
|
||||
// seen: m2 2023, m4 2022 < 2024-01-01
|
||||
assert_eq!(sorted_metric_names(&mut db, q, "seen_lt").await, vec!["m2", "m4"]);
|
||||
// Inline-binding equality exercises the Lance-pushdown arm with a typed
|
||||
// Date32/Date64 literal: the epoch conversion must select exactly m1.
|
||||
assert_eq!(sorted_metric_names(&mut db, q, "born_eq").await, vec!["m1"]);
|
||||
assert_eq!(sorted_metric_names(&mut db, q, "seen_eq").await, vec!["m1"]);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue