mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-09 01:35:18 +02:00
* exec/query: pushdown IR filters via DataFusion Expr (Scanner::filter_expr) Switches `execute_node_scan` from string-flattened Lance SQL pushdown (`build_lance_filter` + `scanner.filter(&str)`) to structured DataFusion Expr pushdown (`build_lance_filter_expr` + `scanner.filter_expr(Expr)`). ## What this enables 1. **`CompOp::Contains` now pushes down.** `ir_filter_to_sql` returned `None` for list-contains (the comment said *"Can't pushdown list contains"*) because string SQL can't easily express it. With Expr, it lowers to DataFusion's `array_has(col, value)` builtin via the `nested_expressions` feature, and pushes down to Lance's scan layer the same way Eq/Lt/etc. do. Pinned by the new regression test `end_to_end::ir_filter_with_list_contains_pushes_down`. 2. **DataFusion 53's optimizer rules now reach our predicates.** Once the Expr lands at the Lance scanner, DF's planner runs: - `IN`-list vectorized eq kernel (DF #20528) - `PhysicalExprSimplifier` (DF #20111) - CASE WHEN x THEN y ELSE NULL shortcut (DF #20097) - Push limit into hash join (DF #20228) None of these were applicable before because the string SQL path short-circuited the optimizer. ## Scope This is one of three string-flattened pushdown sites; the other two (`hydrate_nodes`/Expand pushdown at query.rs:771-796 and the mutation delete path in `exec/mutation.rs::predicate_to_sql`) stay on the SQL string path for now: - The Expand pushdown still serializes through `hydrate_nodes`'s `extra_filter_sql: Option<&str>` parameter. Migrating it changes the `TableStorage` trait surface (`scan_stream(filter: Option<&str>)` → `Option<Expr>`) and the cascading call sites — out of scope for this MR. - The mutation delete predicate still goes through `Dataset::delete(&str)` in Lance 6.0.1. MR-A (delete two-phase via Lance #6658, gated on the Lance v7 bump per issue #112) will migrate that path to `DeleteBuilder::execute_uncommitted` taking an Expr. The existing `ir_filter_to_sql` / `ir_expr_to_sql` / `literal_to_sql` helpers stay in place to serve the remaining string-SQL consumers (mutation predicates). They get retired when the other call sites migrate. ## Cargo Enables the `nested_expressions` feature on the `datafusion` workspace dep. Lance already pulls in `datafusion-functions-nested` transitively (it's listed in their feature set), so this just exposes the `datafusion::functions_nested::expr_fn::array_has` re-export. No transitive dep change (Cargo.lock unchanged). ## Tests - New: `ir_filter_with_list_contains_pushes_down` — pins the case that was previously impossible (`ir_filter_to_sql` returning `None`). - 906/906 workspace tests still pass. - 417/417 engine integration tests pass (was 416 + the new one). - 19/19 failpoints (recovery canary). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * ci: pin rustfs/rustfs to 1.0.0-beta.3 (last known-good before creds-policy break) The RustFS S3 Integration job started failing 2026-05-23 with all 3 tests panicking on the first PUT: HTTP error: error sending request The "Dump RustFS logs on failure" step revealed the container was dying at startup: [FATAL] Server encountered an error and is shutting down: Default root credentials are not allowed on non-loopback listeners; set RUSTFS_ACCESS_KEY and RUSTFS_SECRET_KEY to non-default values, bind to loopback, or set RUSTFS_ALLOW_INSECURE_DEFAULT_CREDENTIALS=true for local development only `rustfs/rustfs:latest` was updated 2026-05-21 (1.0.0-beta.4) with a credentials-policy check that rejects `rustfsadmin`/`rustfsadmin` as "default" values. PR #111 passed yesterday because it ran against beta.3; today's runs against beta.4 fail at container startup. This is unrelated to PR #113's Expr-pushdown refactor — the bump just happened to hit the same week. Pin to 1.0.0-beta.3 (2026-05-14, last tag before the change). The right long-term fix is one of: - Rotate the CI creds to less-default values (less coupling to RustFS's "default" set definition) - Set `RUSTFS_ALLOW_INSECURE_DEFAULT_CREDENTIALS=true` per the error message - Use a workflow service container with controlled lifecycle Deferred — pinning is the minimal restore. Also incidentally documents *which* version we tested against, which `:latest` never did. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
85 lines
2.2 KiB
TOML
85 lines
2.2 KiB
TOML
[workspace]
|
|
resolver = "2"
|
|
members = [
|
|
"crates/omnigraph-compiler",
|
|
"crates/omnigraph",
|
|
"crates/omnigraph-cli",
|
|
"crates/omnigraph-policy",
|
|
"crates/omnigraph-server",
|
|
]
|
|
default-members = [
|
|
"crates/omnigraph",
|
|
"crates/omnigraph-cli",
|
|
"crates/omnigraph-server",
|
|
]
|
|
|
|
[workspace.dependencies]
|
|
arrow-array = "58"
|
|
arrow-ipc = "58"
|
|
arrow-schema = "58"
|
|
arrow-select = "58"
|
|
arrow-cast = { version = "58", features = ["prettyprint"] }
|
|
arrow-ord = "58"
|
|
|
|
datafusion = { version = "53", default-features = false, features = ["nested_expressions"] }
|
|
datafusion-physical-plan = "53"
|
|
datafusion-physical-expr = "53"
|
|
datafusion-execution = "53"
|
|
datafusion-common = "53"
|
|
datafusion-expr = "53"
|
|
datafusion-functions-aggregate = "53"
|
|
|
|
lance = { version = "6.0.1", default-features = false, features = ["aws"] }
|
|
lance-datafusion = "6.0.1"
|
|
lance-file = "6.0.1"
|
|
lance-index = "6.0.1"
|
|
lance-linalg = "6.0.1"
|
|
lance-namespace = "6.0.1"
|
|
lance-namespace-impls = "6.0.1"
|
|
lance-table = "6.0.1"
|
|
|
|
ulid = "1"
|
|
futures = "0.3"
|
|
async-trait = "0.1"
|
|
chrono = { version = "0.4", default-features = false, features = ["clock"] }
|
|
pest = "2"
|
|
pest_derive = "2"
|
|
thiserror = "2"
|
|
tokio = { version = "1", features = ["rt-multi-thread", "macros", "time", "net", "signal", "sync"] }
|
|
clap = { version = "4", features = ["derive"] }
|
|
serde = { version = "1", features = ["derive"] }
|
|
serde_json = "1"
|
|
serde_yaml = "0.9"
|
|
tracing = "0.1"
|
|
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
|
tower = "0.5"
|
|
tower-http = { version = "0.6", features = ["trace"] }
|
|
color-eyre = "0.6"
|
|
tempfile = "3"
|
|
ahash = "0.8"
|
|
arc-swap = "1"
|
|
base64 = "0.22"
|
|
ariadne = "0.4"
|
|
regex = "1"
|
|
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
|
|
object_store = { version = "0.12.5", default-features = false, features = ["aws"] }
|
|
fail = "0.5"
|
|
time = { version = "0.3", features = ["formatting"] }
|
|
axum = { version = "0.8", features = ["json", "macros"] }
|
|
utoipa = { version = "5", features = ["axum_extras"] }
|
|
url = "2"
|
|
cedar-policy = "4.9"
|
|
sha2 = "0.10"
|
|
subtle = "2"
|
|
|
|
[profile.dev]
|
|
debug = 0
|
|
|
|
[profile.dev.package."*"]
|
|
opt-level = 2
|
|
|
|
[profile.release]
|
|
opt-level = 2
|
|
lto = "thin"
|
|
codegen-units = 16
|
|
strip = true
|