diff --git a/AGENTS.md b/AGENTS.md index d1c1bde..6e45aa7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -18,7 +18,7 @@ Tools that support `@`-imports (Claude Code) auto-include all three files via th **Version surveyed:** 0.4.2 **Workspace crates:** `omnigraph-compiler`, `omnigraph` (engine), `omnigraph-cli`, `omnigraph-server` -**Storage substrate:** Lance 4.x (columnar, versioned, branchable) +**Storage substrate:** Lance 6.x (columnar, versioned, branchable) **License:** MIT **Toolchain:** Rust stable, edition 2024 @@ -53,7 +53,7 @@ CLI (omnigraph) HTTP Server (omnigraph-server, Axum) omnigraph (engine) ── ManifestRepo, CommitGraph, RunRegistry, GraphIndex (CSR/CSC), exec │ ▼ - Lance 4.x ── columnar Arrow, fragments, per-dataset versions/branches, indexes + Lance 6.x ── columnar Arrow, fragments, per-dataset versions/branches, indexes │ ▼ Object store (file / s3 / RustFS / MinIO / S3-compat) diff --git a/Cargo.lock b/Cargo.lock index 0e3aac4..fcc2d7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -175,9 +175,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" +checksum = "378530e55cd479eda3c14eb345310799717e6f76d0c332041e8487022166b471" dependencies = [ "arrow-arith", "arrow-array", @@ -196,9 +196,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" +checksum = "a0ab212d2c1886e802f51c5212d78ebbcbb0bec980fff9dadc1eb8d45cd0b738" dependencies = [ "arrow-array", "arrow-buffer", @@ -210,9 +210,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" +checksum = "cfd33d3e92f207444098c75b42de99d329562be0cf686b307b097cc52b4e999e" dependencies = [ "ahash", "arrow-buffer", @@ -221,7 +221,7 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "num-complex", "num-integer", "num-traits", @@ -229,9 +229,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" +checksum = "0c6cd424c2693bcdbc150d843dc9d4d137dd2de4782ce6df491ad11a3a0416c0" dependencies = [ "bytes", "half", @@ -241,9 +241,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" +checksum = "4c5aefb56a2c02e9e2b30746241058b85f8983f0fcff2ba0c6d09006e1cded7f" dependencies = [ "arrow-array", "arrow-buffer", @@ -263,9 +263,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" +checksum = "e94e8cf7e517657a52b91ea1263acf38c4ca62a84655d72458a3359b12ab97de" dependencies = [ "arrow-array", "arrow-cast", @@ -278,9 +278,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" +checksum = "3c88210023a2bfee1896af366309a3028fc3bcbd6515fa29a7990ee1baa08ee0" dependencies = [ "arrow-buffer", "arrow-schema", @@ -291,9 +291,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" +checksum = "238438f0834483703d88896db6fe5a7138b2230debc31b34c0336c2996e3c64f" dependencies = [ "arrow-array", "arrow-buffer", @@ -301,21 +301,22 @@ dependencies = [ "arrow-schema", "arrow-select", "flatbuffers", - "lz4_flex 0.12.1", + "lz4_flex", "zstd", ] [[package]] name = "arrow-json" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" +checksum = "205ca2119e6d679d5c133c6f30e68f027738d95ed948cf77677ea69c7800036b" dependencies = [ "arrow-array", "arrow-buffer", "arrow-cast", - "arrow-data", + "arrow-ord", "arrow-schema", + "arrow-select", "chrono", "half", "indexmap 2.13.0", @@ -331,9 +332,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" +checksum = "1bffd8fd2579286a5d63bac898159873e5094a79009940bcb42bbfce4f19f1d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -344,9 +345,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" +checksum = "bab5994731204603c73ba69267616c50f80780774c6bb0476f1f830625115e0c" dependencies = [ "arrow-array", "arrow-buffer", @@ -357,9 +358,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" +checksum = "f633dbfdf39c039ada1bf9e34c694816eb71fbb7dc78f613993b7245e078a1ed" dependencies = [ "bitflags", "serde_core", @@ -368,9 +369,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" +checksum = "8cd065c54172ac787cf3f2f8d4107e0d3fdc26edba76fdf4f4cc170258942222" dependencies = [ "ahash", "arrow-array", @@ -382,9 +383,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" +checksum = "29dd7cda3ab9692f43a2e4acc444d760cc17b12bb6d8232ddf64e9bab7c06b42" dependencies = [ "arrow-array", "arrow-buffer", @@ -464,7 +465,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -475,7 +476,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -958,7 +959,7 @@ checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -1105,31 +1106,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "bon" -version = "3.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" -dependencies = [ - "bon-macros", - "rustversion", -] - -[[package]] -name = "bon-macros" -version = "3.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" -dependencies = [ - "darling", - "ident_case", - "prettyplease", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.115", -] - [[package]] name = "borsh" version = "1.6.1" @@ -1290,12 +1266,6 @@ dependencies = [ "smol_str", ] -[[package]] -name = "census" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" - [[package]] name = "cfg-if" version = "1.0.4" @@ -1310,9 +1280,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.43" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ "iana-time-zone", "js-sys", @@ -1373,7 +1343,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -1645,7 +1615,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -1656,7 +1626,7 @@ checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ "darling_core", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -1675,9 +1645,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43c18ba387f9c05ac1f3be32a73f8f3cc6c1cfc43e5d4b7a8e5b0d3a5eb48dc7" +checksum = "93db0e623840612f7f2cd757f7e8a8922064192363732c88692e0870016e141b" dependencies = [ "arrow", "arrow-schema", @@ -1711,7 +1681,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "rand 0.9.2", "regex", @@ -1724,9 +1694,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c75a4ce672b27fb8423810efb92a3600027717a1664d06a2c307eeeabcec694" +checksum = "37cefde60b26a7f4ff61e9d2ff2833322f91df2b568d7238afe67bde5bdffb66" dependencies = [ "arrow", "async-trait", @@ -1742,16 +1712,16 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "tokio", ] [[package]] name = "datafusion-catalog-listing" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c8b9a3795ffb46bf4957a34c67d89a67558b311ae455c8d4295ff2115eeea50" +checksum = "17e112307715d6a7a331111a4c2330ff54bc237183511c319e3708a4cff431fb" dependencies = [ "arrow", "async-trait", @@ -1767,14 +1737,14 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", ] [[package]] name = "datafusion-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "205dc1e20441973f470e6b7ef87626a3b9187970e5106058fef1b713047f770c" +checksum = "d72a11ca44a95e1081870d3abb80c717496e8a7acb467a1d3e932bb636af5cc2" dependencies = [ "ahash", "arrow", @@ -1783,9 +1753,10 @@ dependencies = [ "half", "hashbrown 0.16.1", "indexmap 2.13.0", + "itertools 0.14.0", "libc", "log", - "object_store", + "object_store 0.13.2", "paste", "sqlparser", "tokio", @@ -1794,9 +1765,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cf5880c02ff6f5f11fb5bc19211789fb32fd3c53d79b7d6cb2b12e401312ba0" +checksum = "89f4afaed29670ec4fd6053643adc749fe3f4bc9d1ce1b8c5679b22c67d12def" dependencies = [ "futures", "log", @@ -1805,9 +1776,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc614d6e709450e29b7b032a42c1bdb705f166a6b2edef7bed7c7897eb905499" +checksum = "e9fb386e1691355355a96419978a0022b7947b44d4a24a6ea99f00b6b485cbb6" dependencies = [ "arrow", "async-trait", @@ -1826,7 +1797,7 @@ dependencies = [ "glob", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "rand 0.9.2", "tokio", "url", @@ -1834,9 +1805,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e497d5fc48dac7ce86f6b4fb09a3a494385774af301ff20ec91aebfae9b05b4" +checksum = "ffa6c52cfed0734c5f93754d1c0175f558175248bf686c944fb05c373e5fc096" dependencies = [ "arrow", "arrow-ipc", @@ -1852,15 +1823,15 @@ dependencies = [ "datafusion-session", "futures", "itertools 0.14.0", - "object_store", + "object_store 0.13.2", "tokio", ] [[package]] name = "datafusion-datasource-csv" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dfc250cad940d0327ca2e9109dc98830892d17a3d6b2ca11d68570e872cf379" +checksum = "503f29e0582c1fc189578d665ff57d9300da1f80c282777d7eb67bb79fb8cdca" dependencies = [ "arrow", "async-trait", @@ -1874,16 +1845,16 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store", + "object_store 0.13.2", "regex", "tokio", ] [[package]] name = "datafusion-datasource-json" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91e9677ed62833b0e8129dec0d1a8f3c9bb7590bd6dd714a43e4c3b663e4aa0" +checksum = "e33804749abc8d0c8cb7473228483cb8070e524c6f6086ee1b85a64debe2b3d2" dependencies = [ "arrow", "async-trait", @@ -1897,31 +1868,35 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store", + "object_store 0.13.2", + "serde_json", "tokio", + "tokio-stream", ] [[package]] name = "datafusion-doc" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e13e5fe3447baa0584b61ee8644086e007e1ef6e58f4be48bc8a72417854729" +checksum = "8de6ac0df1662b9148ad3c987978b32cbec7c772f199b1d53520c8fa764a87ee" [[package]] name = "datafusion-execution" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48a6cc03e34899a54546b229235f7b192634c8e832f78a267f0989b18216c56d" +checksum = "c03c7fbdaefcca4ef6ffe425a5fc2325763bfb426599bb0bf4536466efabe709" dependencies = [ "arrow", + "arrow-buffer", "async-trait", "chrono", "dashmap", "datafusion-common", "datafusion-expr", + "datafusion-physical-expr-common", "futures", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "rand 0.9.2", "tempfile", @@ -1930,9 +1905,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee3315d87eca7a7df58e52a1fb43b4c4171b545fd30ffc3102945c162a9f6ddb" +checksum = "574b9b6977fedbd2a611cbff12e5caf90f31640ad9dc5870f152836d94bad0dd" dependencies = [ "arrow", "async-trait", @@ -1952,9 +1927,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c6d83feae0753799f933a2c47dfd15980c6947960cb95ed60f5c1f885548b3" +checksum = "7d7c3adf3db8bf61e92eb90cb659c8e8b734593a8f7c8e12a843c7ddba24b87e" dependencies = [ "arrow", "datafusion-common", @@ -1965,9 +1940,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b82962015cc3db4d7662459c9f7fcda0591b5edacb8af1cf3bc3031f274800" +checksum = "f28aa4e10384e782774b10e72aca4d93ef7b31aa653095d9d4536b0a3dbc51b6" dependencies = [ "arrow", "arrow-buffer", @@ -1986,6 +1961,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "memchr", "num-traits", "rand 0.9.2", "regex", @@ -1996,9 +1972,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e42c227d9e55a6c8041785d4a8a117e4de531033d480aae10984247ac62e27e" +checksum = "00aa6217e56098ba84e0a338176fe52f0a84cca398021512c6c8c5eff806d0ad" dependencies = [ "ahash", "arrow", @@ -2012,14 +1988,15 @@ dependencies = [ "datafusion-physical-expr-common", "half", "log", + "num-traits", "paste", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cead3cfed825b0b688700f4338d281cd7857e4907775a5b9554c083edd5f3f95" +checksum = "b511250349407db7c43832ab2de63f5557b19a20dfd236b39ca2c04468b50d47" dependencies = [ "ahash", "arrow", @@ -2030,9 +2007,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62ea99612970aebab8cf864d02eb3d296bbab7f4881e1023d282b57fe431b201" +checksum = "ef13a858e20d50f0a9bb5e96e7ac82b4e7597f247515bccca4fdd2992df0212a" dependencies = [ "arrow", "arrow-ord", @@ -2046,16 +2023,18 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", + "hashbrown 0.16.1", "itertools 0.14.0", + "itoa", "log", "paste", ] [[package]] name = "datafusion-functions-table" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d83dbf3ab8b9af6f209b068825a7adbd3b88bf276f2a1ec14ba09567b97f5674" +checksum = "72b40d3f5bbb3905f9ccb1ce9485a9595c77b69758a7c24d3ba79e334ff51e7e" dependencies = [ "arrow", "async-trait", @@ -2069,9 +2048,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "732edabe07496e2fc5a1e57a284d7a36edcea445a2821119770a0dea624b472c" +checksum = "d4e88ec9d57c9b685d02f58bfee7be62d72610430ddcedb82a08e5d9925dbfb6" dependencies = [ "arrow", "datafusion-common", @@ -2087,9 +2066,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c6e30e09700799bd52adce8c377ab03dda96e73a623e4803a31ad94fe7ce14" +checksum = "8307bb93519b1a91913723a1130cfafeee3f72200d870d88e91a6fc5470ede5c" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2097,20 +2076,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402f2a8ed70fb99a18f71580a1fe338604222a3d32ddeac6e72c5b34feea2d4d" +checksum = "2e367e6a71051d0ebdd29b2f85d12059b38b1d1f172c6906e80016da662226bd" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] name = "datafusion-optimizer" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99f32edb8ba12f08138f86c09b80fae3d4a320551262fa06b91d8a8cb3065a5b" +checksum = "e929015451a67f77d9d8b727b2bf3a40c4445fdef6cdc53281d7d97c76888ace" dependencies = [ "arrow", "chrono", @@ -2127,9 +2106,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "987c5e29e96186589301b42e25aa7d11bbe319a73eb02ef8d755edc55b5b89fc" +checksum = "4b1e68aba7a4b350401cfdf25a3d6f989ad898a7410164afe9ca52080244cb59" dependencies = [ "ahash", "arrow", @@ -2150,9 +2129,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1de89d0afa08b6686697bd8a6bac4ba2cd44c7003356e1bce6114d5a93f94b5c" +checksum = "ea22315f33cf2e0adc104e8ec42e285f6ed93998d565c65e82fec6a9ee9f9db4" dependencies = [ "arrow", "datafusion-common", @@ -2165,9 +2144,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "602d1970c0fe87f1c3a36665d131fbfe1c4379d35f8fc5ec43a362229ad2954d" +checksum = "b04b45ea8ad3ac2d78f2ea2a76053e06591c9629c7a603eda16c10649ecf4362" dependencies = [ "ahash", "arrow", @@ -2182,9 +2161,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b24d704b6385ebe27c756a12e5ba15684576d3b47aeca79cc9fb09480236dc32" +checksum = "7cb13397809a425918f608dfe8653f332015a3e330004ab191b4404187238b95" dependencies = [ "arrow", "datafusion-common", @@ -2200,9 +2179,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c21d94141ea5043e98793f170798e9c1887095813b8291c5260599341e383a38" +checksum = "5edc023675791af9d5fb4cc4c24abf5f7bd3bd4dcf9e5bd90ea1eff6976dcc79" dependencies = [ "ahash", "arrow", @@ -2224,6 +2203,7 @@ dependencies = [ "indexmap 2.13.0", "itertools 0.14.0", "log", + "num-traits", "parking_lot", "pin-project-lite", "tokio", @@ -2231,9 +2211,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a68cce43d18c0dfac95cacd74e70565f7e2fb12b9ed41e2d312f0fa837626b1" +checksum = "ac8c76860e355616555081cab5968cec1af7a80701ff374510860bcd567e365a" dependencies = [ "arrow", "datafusion-common", @@ -2248,9 +2228,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4e1c40a0b1896aed4a4504145c2eb7fa9b9da13c2d04b40a4767a09f076199" +checksum = "5412111aa48e2424ba926112e192f7a6b7e4ccb450145d25ce5ede9f19dc491e" dependencies = [ "async-trait", "datafusion-common", @@ -2262,15 +2242,16 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f1891e5b106d1d73c7fe403bd8a265d19c3977edc17f60808daf26c2fe65ffb" +checksum = "fa0d133ddf8b9b3b872acac900157f783e7b879fe9a6bccf389abebbfac45ec1" dependencies = [ "arrow", "bigdecimal", "chrono", "datafusion-common", "datafusion-expr", + "datafusion-functions-nested", "indexmap 2.13.0", "log", "regex", @@ -2365,7 +2346,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -2377,12 +2358,6 @@ dependencies = [ "const-random", ] -[[package]] -name = "downcast-rs" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc" - [[package]] name = "dunce" version = "1.0.5" @@ -2404,7 +2379,7 @@ dependencies = [ "enum-ordinalize", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -2448,7 +2423,7 @@ checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -2521,12 +2496,6 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55" -[[package]] -name = "fastdivide" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" - [[package]] name = "fastrand" version = "2.3.0" @@ -2601,16 +2570,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fs4" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8" -dependencies = [ - "rustix 0.38.44", - "windows-sys 0.52.0", -] - [[package]] name = "fs_extra" version = "1.3.0" @@ -2619,9 +2578,9 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2195cc7f87e84bd695586137de99605e7e9579b26ec5e01b82960ddb4d0922f2" +checksum = "83cf860f6a6bf0a6a60fdfe5a36c75121fad5ea4332d1d12deee3e65b6047727" dependencies = [ "arrow-array", "rand 0.9.2", @@ -2698,7 +2657,7 @@ checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -2887,8 +2846,6 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "allocator-api2", - "equivalent", "foldhash 0.1.5", ] @@ -2903,6 +2860,12 @@ dependencies = [ "foldhash 0.2.0", ] +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + [[package]] name = "heck" version = "0.5.0" @@ -2939,12 +2902,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "htmlescape" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" - [[package]] name = "http" version = "0.2.12" @@ -3307,6 +3264,17 @@ dependencies = [ "generic-array", ] +[[package]] +name = "io-uring" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d09b98f7eace8982db770e4408e7470b028ce513ac28fecdc6bf4c30fe92b62" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -3376,7 +3344,7 @@ checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -3425,7 +3393,7 @@ dependencies = [ "fast-float2", "itoa", "jiff", - "nom 8.0.0", + "nom", "num-traits", "ordered-float", "rand 0.9.2", @@ -3492,14 +3460,15 @@ dependencies = [ [[package]] name = "lance" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efe6c3ddd79cdfd2b7e1c23cafae52806906bc40fbd97de9e8cf2f8c7a75fc04" +checksum = "d34e854994e84d043897f5ec9fb609221e9e69e3fd52996cd715d979fcd349f6" dependencies = [ "arrow", "arrow-arith", "arrow-array", "arrow-buffer", + "arrow-cast", "arrow-ipc", "arrow-ord", "arrow-row", @@ -3535,12 +3504,14 @@ dependencies = [ "lance-linalg", "lance-namespace", "lance-table", + "lance-tokenizer", "log", "moka", - "object_store", + "object_store 0.12.5", "permutation", "pin-project", "prost", + "prost-build", "prost-types", "rand 0.9.2", "roaring", @@ -3548,7 +3519,6 @@ dependencies = [ "serde", "serde_json", "snafu", - "tantivy", "tokio", "tokio-stream", "tokio-util", @@ -3559,14 +3529,15 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d9f5d95bdda2a2b790f1fb8028b5b6dcf661abeb3133a8bca0f3d24b054af87" +checksum = "7827fe404358c27d120ee8ea8ef7b9415c2911d54072bec83dd689d750ae65da" dependencies = [ "arrow-array", "arrow-buffer", "arrow-cast", "arrow-data", + "arrow-ipc", "arrow-ord", "arrow-schema", "arrow-select", @@ -3581,9 +3552,9 @@ dependencies = [ [[package]] name = "lance-bitpacking" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f827d6ab9f8f337a9509d5ad66a12f3314db8713868260521c344ef6135eb4e4" +checksum = "2cd0b31570d50fe13c7e4e36b03e1f1c99c3d8e5a34845b24b0665b51b40570d" dependencies = [ "arrayref", "paste", @@ -3592,9 +3563,9 @@ dependencies = [ [[package]] name = "lance-core" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f1e25df6a79bf72ee6bcde0851f19b1cd36c5848c1b7db83340882d3c9fdecb" +checksum = "b128c213c676cb8e03c62a68670642770825171e64097cc2da97cbb19fe35d29" dependencies = [ "arrow-array", "arrow-buffer", @@ -3614,7 +3585,7 @@ dependencies = [ "mock_instant", "moka", "num_cpus", - "object_store", + "object_store 0.12.5", "pin-project", "prost", "rand 0.9.2", @@ -3631,13 +3602,14 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93146de8ae720cb90edef81c2f2d0a1b065fc2f23ecff2419546f389b0fa70a4" +checksum = "e03b2de71cbcd09b10bf1a17c83cacbc0176ecd97203fb72b9e59d9b8f9a3743" dependencies = [ "arrow", "arrow-array", "arrow-buffer", + "arrow-cast", "arrow-ord", "arrow-schema", "arrow-select", @@ -3663,9 +3635,9 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccec8ce4d8e0a87a99c431dab2364398029f2ffb649c1a693c60c79e05ed30dd" +checksum = "2fe7c7ea7fd397e495a1646fec360e46ee0cbd75718f1c0e887aad657c5f2944" dependencies = [ "arrow", "arrow-array", @@ -3676,16 +3648,16 @@ dependencies = [ "half", "hex", "rand 0.9.2", - "rand_distr 0.5.1", + "rand_distr", "rand_xoshiro", "random_word", ] [[package]] name = "lance-encoding" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1aec0bbbac6bce829bc10f1ba066258126100596c375fb71908ecf11c2c2a5" +checksum = "fe3f8070835b407d8db9ea8728386bc3207ba23c66a9c22d344e231ef12b77ca" dependencies = [ "arrow-arith", "arrow-array", @@ -3722,9 +3694,9 @@ dependencies = [ [[package]] name = "lance-file" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14a8c548804f5b17486dc2d3282356ed1957095a852780283bc401fdd69e9075" +checksum = "a6dfcf654549330df3aef708cd7c12e170feecddd34d6c19dd005b4153213268" dependencies = [ "arrow-arith", "arrow-array", @@ -3745,7 +3717,7 @@ dependencies = [ "lance-io", "log", "num-traits", - "object_store", + "object_store 0.12.5", "prost", "prost-build", "prost-types", @@ -3756,9 +3728,9 @@ dependencies = [ [[package]] name = "lance-index" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2da212f0090ea59f79ac3686660f596520c167fe1cb5f408900cf71d215f0e03" +checksum = "4fb8ad0bd10efa2608634a2518b7dd501231e76c56a65fbd6519e23914cc425a" dependencies = [ "arrow", "arrow-arith", @@ -3795,16 +3767,17 @@ dependencies = [ "lance-io", "lance-linalg", "lance-table", + "lance-tokenizer", "libm", "log", "ndarray", "num-traits", - "object_store", + "object_store 0.12.5", "prost", "prost-build", "prost-types", "rand 0.9.2", - "rand_distr 0.5.1", + "rand_distr", "rangemap", "rayon", "roaring", @@ -3812,7 +3785,6 @@ dependencies = [ "serde_json", "smallvec", "snafu", - "tantivy", "tempfile", "tokio", "tracing", @@ -3822,9 +3794,9 @@ dependencies = [ [[package]] name = "lance-io" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d958eb4b56f03bbe0f5f85eb2b4e9657882812297b6f711f201ffc995f259f" +checksum = "ef5314703fa8c8baed04193cc669da80ab42521c6319d3cc921a4a997690dcc0" dependencies = [ "arrow", "arrow-arith", @@ -3844,11 +3816,14 @@ dependencies = [ "deepsize", "futures", "http 1.4.0", + "io-uring", "lance-arrow", "lance-core", "lance-namespace", + "libc", "log", - "object_store", + "moka", + "object_store 0.12.5", "object_store_opendal", "opendal", "path_abs", @@ -3865,9 +3840,9 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0285b70da35def7ed95e150fae1d5308089554e1290470403ed3c50cb235bc5e" +checksum = "51aa9b73279f505b2bec0f194c7a2390ca74ad3260131e631a7bef8d97d54b2e" dependencies = [ "arrow-array", "arrow-buffer", @@ -3883,9 +3858,9 @@ dependencies = [ [[package]] name = "lance-namespace" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f78e2a828b654e062a495462c6e3eb4fcf0e7e907d761b8f217fc09ccd3ceac" +checksum = "39cd01581f55ce45c49cbe494ee86c7ba7ca4ca3654690fd820941cd9105a46e" dependencies = [ "arrow", "async-trait", @@ -3898,9 +3873,9 @@ dependencies = [ [[package]] name = "lance-namespace-impls" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2392314f3da38f00d166295e44244208a65ccfc256e274fa8631849fc3f4d94" +checksum = "c2cb89f3933060f01350ad05a5a3fbda952e8ba638799bf8ac4cd2368416ee46" dependencies = [ "arrow", "arrow-ipc", @@ -3913,10 +3888,11 @@ dependencies = [ "lance-core", "lance-index", "lance-io", + "lance-linalg", "lance-namespace", "lance-table", "log", - "object_store", + "object_store 0.12.5", "rand 0.9.2", "serde_json", "snafu", @@ -3926,22 +3902,23 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.6.1" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee2e48de899e2931afb67fcddd0a08e439bf5d8b6ea2a2ed9cb8f4df669bd5cc" +checksum = "6369eee4682fb11edf538388b43c61ce288b8302fe89bb40944d7daa7faaae99" dependencies = [ "reqwest", "serde", "serde_json", "serde_repr", + "serde_with", "url", ] [[package]] name = "lance-table" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3df9c4adca3eb2074b3850432a9fb34248a3d90c3d6427d158b13ff9355664ee" +checksum = "5db70650465a1af174b7dfe6948ec91a3d466ada12e11274eb66e51132173aa0" dependencies = [ "arrow", "arrow-array", @@ -3959,7 +3936,7 @@ dependencies = [ "lance-file", "lance-io", "log", - "object_store", + "object_store 0.12.5", "prost", "prost-build", "prost-types", @@ -3976,6 +3953,17 @@ dependencies = [ "uuid", ] +[[package]] +name = "lance-tokenizer" +version = "6.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb08ef9382c9d58036c323db2c19cc097e02d1d0d87714fc7176b5d3b36a31aa" +dependencies = [ + "rust-stemmers", + "serde", + "unicode-normalization", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -3991,12 +3979,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" -[[package]] -name = "levenshtein_automata" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" - [[package]] name = "lexical-core" version = "1.0.6" @@ -4093,12 +4075,6 @@ dependencies = [ "linked-hash-map", ] -[[package]] -name = "linux-raw-sys" -version = "0.4.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" - [[package]] name = "linux-raw-sys" version = "0.12.1" @@ -4146,7 +4122,7 @@ dependencies = [ "quote", "regex-automata", "regex-syntax", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -4171,15 +4147,6 @@ dependencies = [ "tracing-subscriber", ] -[[package]] -name = "lru" -version = "0.12.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" -dependencies = [ - "hashbrown 0.15.5", -] - [[package]] name = "lru-slab" version = "0.1.2" @@ -4207,15 +4174,9 @@ dependencies = [ [[package]] name = "lz4_flex" -version = "0.11.6" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "373f5eceeeab7925e0c1098212f2fbc4d416adec9d35051a6ab251e824c1854a" - -[[package]] -name = "lz4_flex" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" +checksum = "7ef0d4ed8669f8f8826eb00dc878084aa8f253506c4fd5e8f58f5bce72ddb97e" dependencies = [ "twox-hash", ] @@ -4258,30 +4219,12 @@ dependencies = [ "digest", ] -[[package]] -name = "measure_time" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e" -dependencies = [ - "log", -] - [[package]] name = "memchr" version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" -[[package]] -name = "memmap2" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" -dependencies = [ - "libc", -] - [[package]] name = "miette" version = "7.6.0" @@ -4302,7 +4245,7 @@ checksum = "db5b29714e950dbb20d5e6f74f9dcec4edbcc1067bb7f8ed198c097b8c1a818b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -4321,12 +4264,6 @@ dependencies = [ "unicase", ] -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "miniz_oxide" version = "0.8.9" @@ -4380,12 +4317,6 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" -[[package]] -name = "murmurhash32" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" - [[package]] name = "ndarray" version = "0.16.1" @@ -4407,16 +4338,6 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "nom" version = "8.0.0" @@ -4578,6 +4499,32 @@ dependencies = [ "web-time", ] +[[package]] +name = "object_store" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622acbc9100d3c10e2ee15804b0caa40e55c933d5aa53814cd520805b7958a49" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures-channel", + "futures-core", + "futures-util", + "http 1.4.0", + "humantime", + "itertools 0.14.0", + "parking_lot", + "percent-encoding", + "thiserror", + "tokio", + "tracing", + "url", + "walkdir", + "wasm-bindgen-futures", + "web-time", +] + [[package]] name = "object_store_opendal" version = "0.55.0" @@ -4588,7 +4535,7 @@ dependencies = [ "bytes", "chrono", "futures", - "object_store", + "object_store 0.12.5", "opendal", "pin-project", "tokio", @@ -4601,6 +4548,7 @@ dependencies = [ "assert_cmd", "clap", "color-eyre", + "lance", "lance-index", "omnigraph-compiler", "omnigraph-engine", @@ -4660,7 +4608,7 @@ dependencies = [ "lance-namespace", "lance-namespace-impls", "lance-table", - "object_store", + "object_store 0.12.5", "omnigraph-compiler", "omnigraph-policy", "regex", @@ -4702,6 +4650,7 @@ dependencies = [ "color-eyre", "dashmap", "futures", + "lance", "lance-index", "omnigraph-compiler", "omnigraph-engine", @@ -4733,12 +4682,6 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" -[[package]] -name = "oneshot" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "269bca4c2591a28585d6bf10d9ed0332b7d76900a1b02bec41bdc3a2cdcda107" - [[package]] name = "opendal" version = "0.55.0" @@ -4806,15 +4749,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" -[[package]] -name = "ownedbytes" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fbd56f7631767e61784dc43f8580f403f4475bd4aaa4da003e6295e1bab4a7e" -dependencies = [ - "stable_deref_trait", -] - [[package]] name = "owo-colors" version = "4.2.3" @@ -4939,7 +4873,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -5024,7 +4958,7 @@ checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -5176,7 +5110,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -5213,7 +5147,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.115", + "syn 2.0.117", "tempfile", ] @@ -5227,7 +5161,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -5410,16 +5344,6 @@ dependencies = [ "getrandom 0.3.4", ] -[[package]] -name = "rand_distr" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" -dependencies = [ - "num-traits", - "rand 0.8.5", -] - [[package]] name = "rand_distr" version = "0.5.1" @@ -5521,7 +5445,7 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -5730,19 +5654,6 @@ dependencies = [ "semver", ] -[[package]] -name = "rustix" -version = "0.38.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" -dependencies = [ - "bitflags", - "errno", - "libc", - "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", -] - [[package]] name = "rustix" version = "1.1.4" @@ -5752,7 +5663,7 @@ dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys 0.12.1", + "linux-raw-sys", "windows-sys 0.61.2", ] @@ -6009,7 +5920,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6045,7 +5956,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6088,7 +5999,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6127,7 +6038,7 @@ checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6227,15 +6138,6 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" -[[package]] -name = "sketches-ddsketch" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6f73aeb92d671e0cc4dca167e59b2deb6387c375391bc99ee743f326994a2b" -dependencies = [ - "serde", -] - [[package]] name = "slab" version = "0.4.12" @@ -6276,7 +6178,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6317,9 +6219,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.59.0" +version = "0.61.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" dependencies = [ "log", "sqlparser_derive", @@ -6327,13 +6229,13 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6404,7 +6306,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6426,9 +6328,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.115" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e614ed320ac28113fa64972c4262d5dbc89deacdfd00c34a3e4cea073243c12" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -6452,7 +6354,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6461,152 +6363,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" -[[package]] -name = "tantivy" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64a966cb0e76e311f09cf18507c9af192f15d34886ee43d7ba7c7e3803660c43" -dependencies = [ - "aho-corasick", - "arc-swap", - "base64", - "bitpacking", - "bon", - "byteorder", - "census", - "crc32fast", - "crossbeam-channel", - "downcast-rs", - "fastdivide", - "fnv", - "fs4", - "htmlescape", - "hyperloglogplus", - "itertools 0.14.0", - "levenshtein_automata", - "log", - "lru", - "lz4_flex 0.11.6", - "measure_time", - "memmap2", - "once_cell", - "oneshot", - "rayon", - "regex", - "rust-stemmers", - "rustc-hash", - "serde", - "serde_json", - "sketches-ddsketch", - "smallvec", - "tantivy-bitpacker", - "tantivy-columnar", - "tantivy-common", - "tantivy-fst", - "tantivy-query-grammar", - "tantivy-stacker", - "tantivy-tokenizer-api", - "tempfile", - "thiserror", - "time", - "uuid", - "winapi", -] - -[[package]] -name = "tantivy-bitpacker" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1adc286a39e089ae9938935cd488d7d34f14502544a36607effd2239ff0e2494" -dependencies = [ - "bitpacking", -] - -[[package]] -name = "tantivy-columnar" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6300428e0c104c4f7db6f95b466a6f5c1b9aece094ec57cdd365337908dc7344" -dependencies = [ - "downcast-rs", - "fastdivide", - "itertools 0.14.0", - "serde", - "tantivy-bitpacker", - "tantivy-common", - "tantivy-sstable", - "tantivy-stacker", -] - -[[package]] -name = "tantivy-common" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b6ea6090ce03dc72c27d0619e77185d26cc3b20775966c346c6d4f7e99d7f" -dependencies = [ - "async-trait", - "byteorder", - "ownedbytes", - "serde", - "time", -] - -[[package]] -name = "tantivy-fst" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18" -dependencies = [ - "byteorder", - "regex-syntax", - "utf8-ranges", -] - -[[package]] -name = "tantivy-query-grammar" -version = "0.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e810cdeeebca57fc3f7bfec5f85fdbea9031b2ac9b990eb5ff49b371d52bbe6a" -dependencies = [ - "nom 7.1.3", - "serde", - "serde_json", -] - -[[package]] -name = "tantivy-sstable" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "709f22c08a4c90e1b36711c1c6cad5ae21b20b093e535b69b18783dd2cb99416" -dependencies = [ - "futures-util", - "itertools 0.14.0", - "tantivy-bitpacker", - "tantivy-common", - "tantivy-fst", - "zstd", -] - -[[package]] -name = "tantivy-stacker" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bcdebb267671311d1e8891fd9d1301803fdb8ad21ba22e0a30d0cab49ba59c1" -dependencies = [ - "murmurhash32", - "rand_distr 0.4.3", - "tantivy-common", -] - -[[package]] -name = "tantivy-tokenizer-api" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfa942fcee81e213e09715bbce8734ae2180070b97b33839a795ba1de201547d" -dependencies = [ - "serde", -] - [[package]] name = "tap" version = "1.0.1" @@ -6622,7 +6378,7 @@ dependencies = [ "fastrand", "getrandom 0.4.2", "once_cell", - "rustix 1.1.4", + "rustix", "windows-sys 0.61.2", ] @@ -6658,7 +6414,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6769,7 +6525,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6801,6 +6557,7 @@ dependencies = [ "futures-core", "pin-project-lite", "tokio", + "tokio-util", ] [[package]] @@ -6888,7 +6645,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -7113,7 +6870,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -7244,7 +7001,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", "wasm-bindgen-shared", ] @@ -7333,22 +7090,6 @@ dependencies = [ "rustls-pki-types", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - [[package]] name = "winapi-util" version = "0.1.11" @@ -7358,12 +7099,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-core" version = "0.62.2" @@ -7385,7 +7120,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -7396,7 +7131,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -7618,7 +7353,7 @@ dependencies = [ "heck", "indexmap 2.13.0", "prettyplease", - "syn 2.0.115", + "syn 2.0.117", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -7634,7 +7369,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -7722,7 +7457,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", "synstructure", ] @@ -7743,7 +7478,7 @@ checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -7763,7 +7498,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", "synstructure", ] @@ -7803,7 +7538,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index c3141d2..1e647d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,29 +14,29 @@ default-members = [ ] [workspace.dependencies] -arrow-array = "57" -arrow-ipc = "57" -arrow-schema = "57" -arrow-select = "57" -arrow-cast = { version = "57", features = ["prettyprint"] } -arrow-ord = "57" +arrow-array = "58" +arrow-ipc = "58" +arrow-schema = "58" +arrow-select = "58" +arrow-cast = { version = "58", features = ["prettyprint"] } +arrow-ord = "58" -datafusion = { version = "52", default-features = false } -datafusion-physical-plan = "52" -datafusion-physical-expr = "52" -datafusion-execution = "52" -datafusion-common = "52" -datafusion-expr = "52" -datafusion-functions-aggregate = "52" +datafusion = { version = "53", default-features = false } +datafusion-physical-plan = "53" +datafusion-physical-expr = "53" +datafusion-execution = "53" +datafusion-common = "53" +datafusion-expr = "53" +datafusion-functions-aggregate = "53" -lance = { version = "4.0.0", default-features = false, features = ["aws"] } -lance-datafusion = "4.0.0" -lance-file = "4.0.0" -lance-index = "4.0.0" -lance-linalg = "4.0.0" -lance-namespace = "4.0.0" -lance-namespace-impls = "4.0.0" -lance-table = "4.0.0" +lance = { version = "6.0.1", default-features = false, features = ["aws"] } +lance-datafusion = "6.0.1" +lance-file = "6.0.1" +lance-index = "6.0.1" +lance-linalg = "6.0.1" +lance-namespace = "6.0.1" +lance-namespace-impls = "6.0.1" +lance-table = "6.0.1" ulid = "1" futures = "0.3" diff --git a/crates/omnigraph-cli/Cargo.toml b/crates/omnigraph-cli/Cargo.toml index 6441bd9..fb232eb 100644 --- a/crates/omnigraph-cli/Cargo.toml +++ b/crates/omnigraph-cli/Cargo.toml @@ -30,4 +30,5 @@ assert_cmd = "2" predicates = "3" serde_json = { workspace = true } tempfile = { workspace = true } +lance = { workspace = true } lance-index = { workspace = true } diff --git a/crates/omnigraph-cli/tests/cli.rs b/crates/omnigraph-cli/tests/cli.rs index 578d1bd..137f469 100644 --- a/crates/omnigraph-cli/tests/cli.rs +++ b/crates/omnigraph-cli/tests/cli.rs @@ -1,6 +1,6 @@ use std::fs; -use lance_index::traits::DatasetIndexExt; +use lance::index::DatasetIndexExt; use omnigraph::db::{Omnigraph, ReadTarget}; use serde_json::Value; use tempfile::tempdir; diff --git a/crates/omnigraph-server/Cargo.toml b/crates/omnigraph-server/Cargo.toml index 2c89ed4..b12ddfe 100644 --- a/crates/omnigraph-server/Cargo.toml +++ b/crates/omnigraph-server/Cargo.toml @@ -45,4 +45,5 @@ aws-sdk-secretsmanager = { version = "1", optional = true, default-features = fa tempfile = { workspace = true } tower = { workspace = true } serial_test = "3" +lance = { workspace = true } lance-index = { workspace = true } diff --git a/crates/omnigraph-server/tests/server.rs b/crates/omnigraph-server/tests/server.rs index e7b4458..bd77337 100644 --- a/crates/omnigraph-server/tests/server.rs +++ b/crates/omnigraph-server/tests/server.rs @@ -7,7 +7,7 @@ use axum::Router; use axum::body::{Body, to_bytes}; use axum::http::header::AUTHORIZATION; use axum::http::{Method, Request, StatusCode}; -use lance_index::traits::DatasetIndexExt; +use lance::index::DatasetIndexExt; use omnigraph::db::{Omnigraph, ReadTarget, SchemaApplyOptions}; use omnigraph::error::OmniError; use omnigraph::loader::{LoadMode, load_jsonl}; diff --git a/crates/omnigraph/src/db/manifest/namespace.rs b/crates/omnigraph/src/db/manifest/namespace.rs index 724b3e5..80d206f 100644 --- a/crates/omnigraph/src/db/manifest/namespace.rs +++ b/crates/omnigraph/src/db/manifest/namespace.rs @@ -230,6 +230,11 @@ impl LanceNamespace for BranchManifestNamespace { metadata: None, properties: None, managed_versioning: Some(true), + // Every table we return from describe_table is physically + // materialized (open_manifest_dataset succeeds), never just + // "declared." See lance-namespace 6.0.1 DescribeTableResponse + // field docs. + is_only_declared: Some(false), }) } @@ -373,6 +378,11 @@ impl LanceNamespace for StagedTableNamespace { metadata: None, properties: None, managed_versioning: Some(true), + // Every table we return from describe_table is physically + // materialized (open_manifest_dataset succeeds), never just + // "declared." See lance-namespace 6.0.1 DescribeTableResponse + // field docs. + is_only_declared: Some(false), }) } diff --git a/crates/omnigraph/src/lib.rs b/crates/omnigraph/src/lib.rs index d781096..ff0b3d6 100644 --- a/crates/omnigraph/src/lib.rs +++ b/crates/omnigraph/src/lib.rs @@ -1,3 +1,12 @@ +// Lance 6's trait surface (heavier futures/streams nesting around the +// staged-write API in `storage_layer.rs`) pushes us past the default +// trait-resolution recursion limit of 128 on Linux builds. Raising to +// 256 here is the upstream-suggested fix from rustc itself +// ("consider increasing the recursion limit"). macOS happens to short- +// circuit before tripping the limit; CI on Linux does not. Revisit if +// future Lance bumps stop needing this. +#![recursion_limit = "256"] + pub mod changes; pub mod db; pub mod embedding; diff --git a/crates/omnigraph/src/storage_layer.rs b/crates/omnigraph/src/storage_layer.rs index 1efe940..b0fc042 100644 --- a/crates/omnigraph/src/storage_layer.rs +++ b/crates/omnigraph/src/storage_layer.rs @@ -10,11 +10,15 @@ //! ## Transitional residuals on the trait //! //! Several inline-commit methods remain on the trait surface as -//! documented residuals: `delete_where` (Lance 4.0.0's `DeleteJob` is -//! `pub(crate)` — see [#6658](https://github.com/lance-format/lance/issues/6658)), +//! documented residuals: `delete_where` +//! ([#6658](https://github.com/lance-format/lance/issues/6658) closed +//! 2026-05-14, but the public `DeleteBuilder::execute_uncommitted` API +//! did not backport to the 6.x release line — it first ships in +//! `v7.0.0-beta.10`. Migration to staged two-phase delete is tracked as +//! MR-A and is gated on the Lance v7.x bump, not the current v6.0.1 pin), //! `create_vector_index` (segment-commit-path requires //! `build_index_metadata_from_segments` which is `pub(crate)` — see -//! [#6666](https://github.com/lance-format/lance/issues/6666)), and the +//! [#6666](https://github.com/lance-format/lance/issues/6666), still open), and the //! legacy `append_batch` / `merge_insert_batches` / `overwrite_batch` / //! `create_btree_index` / `create_inverted_index` paths kept while //! engine call sites finish migrating off of them (Phase 1b / Phase 9 diff --git a/crates/omnigraph/src/table_store.rs b/crates/omnigraph/src/table_store.rs index 9616e0d..c896b05 100644 --- a/crates/omnigraph/src/table_store.rs +++ b/crates/omnigraph/src/table_store.rs @@ -14,10 +14,11 @@ use lance::dataset::{ WriteParams, }; use lance::datatypes::BlobKind; +use lance::index::DatasetIndexExt; use lance::index::scalar::IndexDetails; use lance_file::version::LanceFileVersion; use lance_index::scalar::{InvertedIndexParams, ScalarIndexParams}; -use lance_index::{DatasetIndexExt, IndexType, is_system_index}; +use lance_index::{IndexType, is_system_index}; use lance_linalg::distance::MetricType; use lance_table::format::{Fragment, IndexMetadata, RowIdMeta}; use lance_table::rowids::{RowIdSequence, write_row_ids}; diff --git a/crates/omnigraph/tests/branching.rs b/crates/omnigraph/tests/branching.rs index 4d292f7..5a0c47d 100644 --- a/crates/omnigraph/tests/branching.rs +++ b/crates/omnigraph/tests/branching.rs @@ -4,7 +4,8 @@ use std::fs; use arrow_array::{Array, Int32Array, UInt64Array}; use futures::TryStreamExt; -use lance_index::{DatasetIndexExt, is_system_index}; +use lance::index::DatasetIndexExt; +use lance_index::is_system_index; use omnigraph::db::commit_graph::CommitGraph; use omnigraph::db::{MergeOutcome, Omnigraph, ReadTarget}; diff --git a/crates/omnigraph/tests/failpoints.rs b/crates/omnigraph/tests/failpoints.rs index e8de05e..a38f0bb 100644 --- a/crates/omnigraph/tests/failpoints.rs +++ b/crates/omnigraph/tests/failpoints.rs @@ -464,7 +464,7 @@ async fn recovery_rolls_forward_load_on_feature_branch() { #[tokio::test] async fn recovery_rolls_forward_ensure_indices_on_feature_branch() { - use lance_index::DatasetIndexExt; + use lance::index::DatasetIndexExt; use omnigraph::loader::{LoadMode, load_jsonl}; use omnigraph::table_store::TableStore; diff --git a/crates/omnigraph/tests/lance_surface_guards.rs b/crates/omnigraph/tests/lance_surface_guards.rs new file mode 100644 index 0000000..b65a808 --- /dev/null +++ b/crates/omnigraph/tests/lance_surface_guards.rs @@ -0,0 +1,244 @@ +//! Lance API surface guards. +//! +//! Each guard pins a Lance API surface that OmniGraph relies on. If a future +//! Lance bump silently renames a variant, restructures a public struct, or +//! flips a method to async, the corresponding guard either fails to compile +//! (compile-time guards) or fails at runtime (runtime guards). The purpose +//! is to turn silent-break risks into red CI bars on the *next* Lance bump, +//! rather than into wrong-state recovery in production. +//! +//! Pair this file with `docs/dev/lance.md`'s alignment audit stanza: any +//! Lance bump runs `cargo test -p omnigraph-engine --test lance_surface_guards` +//! first as the smoke check. +//! +//! ## Compile-only guards +//! +//! Functions prefixed with `_compile_` are gated with a broad `#[allow(...)]` +//! and never called. They exist to make `cargo build -p omnigraph-engine --tests` +//! enforce the API shape. Using `unimplemented!()` as a placeholder lets type +//! inference proceed without running anything. +//! +//! ## Runtime guards +//! +//! Functions decorated `#[tokio::test]` actually run; they construct real +//! values and assert field shapes / types. + +use std::sync::Arc; + +use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray}; +use arrow_schema::{DataType, Field, Schema}; +use lance::Dataset; +use lance::dataset::builder::DatasetBuilder; +use lance::dataset::optimize::{CompactionOptions, compact_files}; +use lance::dataset::write::delete::DeleteResult; +use lance::dataset::{MergeInsertBuilder, WhenMatched, WhenNotMatched, WriteMode, WriteParams}; +use lance_file::version::LanceFileVersion; +use lance_namespace::LanceNamespace; +use lance_table::io::commit::ManifestNamingScheme; + +/// Helper: build a small fresh dataset in a tempdir. Pinned at V2_2 to match +/// production write paths (blob v2 requires V2_2; see `docs/dev/lance.md`). +async fn fresh_dataset(uri: &str) -> Dataset { + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Utf8, false), + Field::new("value", DataType::Int32, false), + ])); + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(StringArray::from(vec!["alice", "bob"])), + Arc::new(Int32Array::from(vec![1, 2])), + ], + ) + .unwrap(); + let reader = RecordBatchIterator::new(vec![Ok(batch)], schema); + let params = WriteParams { + mode: WriteMode::Create, + enable_stable_row_ids: true, + data_storage_version: Some(LanceFileVersion::V2_2), + ..Default::default() + }; + Dataset::write(reader, uri, Some(params)).await.unwrap() +} + +// --- Guard 1: LanceError::TooMuchWriteContention variant exists ------------ +// +// `db/manifest/publisher.rs::map_lance_publish_error` pattern-matches on this +// variant to surface typed `OmniError::ManifestRowLevelCasContention`. If +// Lance renames the variant or removes the builder, this guard fails. + +#[tokio::test] +async fn lance_error_too_much_write_contention_variant_exists() { + let err = lance::Error::too_much_write_contention("guard"); + assert!( + matches!(err, lance::Error::TooMuchWriteContention { .. }), + "Lance::Error::TooMuchWriteContention variant missing or renamed; \ + update db/manifest/publisher.rs::map_lance_publish_error and \ + this guard, then re-pin docs/dev/lance.md." + ); +} + +// --- Guard 2: ManifestLocation field shape --------------------------------- +// +// `db/manifest/metadata.rs:84-88` reads `.path`, `.size`, `.e_tag`, +// `.naming_scheme` off `dataset.manifest_location()`. If any field renames +// or changes type, this guard fails to compile. + +#[tokio::test] +async fn manifest_location_field_shape() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().join("guard.lance"); + let ds = fresh_dataset(uri.to_str().unwrap()).await; + + let loc = ds.manifest_location(); + // Explicit type bindings — these are the load-bearing assertions. If a + // type drifts (e.g. .size: Option → .size: u64), this fails to + // compile. + let _path: &object_store::path::Path = &loc.path; + let _size: Option = loc.size; + let _e_tag: Option = loc.e_tag.clone(); + let _scheme: ManifestNamingScheme = loc.naming_scheme; + // Runtime sanity — naming_scheme should produce a Debug string we use + // verbatim in `TableVersionMetadata::naming_scheme`. + assert!(!format!("{:?}", loc.naming_scheme).is_empty()); +} + +// --- Guard 3: checkout_version + restore async chain ----------------------- +// +// `db/manifest/recovery.rs:505-522` chains `Dataset::open(...).await? +// .checkout_version(N).await?.restore().await?` as the recovery rollback +// hammer. Compile-only — never runs. + +#[allow( + dead_code, + unreachable_code, + unused_variables, + unused_mut, + clippy::diverging_sub_expression +)] +async fn _compile_checkout_version_then_restore_signature() -> lance::Result<()> { + let ds: Dataset = unimplemented!(); + let mut ds: Dataset = ds.checkout_version(1u64).await?; + // `restore()` takes `&mut self` and returns `Result<()>`; the dataset + // mutates in place. If Lance flips this to return a fresh `Dataset` + // (consuming `self`), this guard fails to compile. + let _: () = ds.restore().await?; + Ok(()) +} + +// --- Guard 4: DatasetBuilder::from_namespace fluent chain ------------------ +// +// `db/manifest/namespace.rs:162-174` chains +// `DatasetBuilder::from_namespace(ns, vec![id]).await?.with_branch(...).with_version(...).load().await?`. +// Compile-only. + +#[allow( + dead_code, + unreachable_code, + unused_variables, + unused_mut, + clippy::diverging_sub_expression +)] +async fn _compile_dataset_builder_from_namespace_signature( + ns: Arc, +) -> lance::Result<()> { + let builder: DatasetBuilder = + DatasetBuilder::from_namespace(ns, vec!["table".to_string()]).await?; + let builder: DatasetBuilder = builder.with_branch("b", None); + let builder: DatasetBuilder = builder.with_version(1u64); + let _ds: Dataset = builder.load().await?; + Ok(()) +} + +// --- Guard 5: MergeInsertBuilder fluent chain ------------------------------ +// +// `db/manifest/publisher.rs:370-391` is the manifest CAS. If any method on +// the builder renames or changes signature, the publisher silently breaks. +// Compile-only. + +#[allow( + dead_code, + unreachable_code, + unused_variables, + unused_mut, + clippy::diverging_sub_expression +)] +async fn _compile_merge_insert_builder_method_chain() -> lance::Result<()> { + use lance::dataset::MergeStats; + + let ds: Arc = unimplemented!(); + let job = MergeInsertBuilder::try_new(ds, vec!["object_id".to_string()])? + .when_matched(WhenMatched::UpdateAll) + .when_not_matched(WhenNotMatched::InsertAll) + .conflict_retries(0) + .use_index(false) + .try_build()?; + + // execute_reader takes `impl StreamingWriteSource` (lance trait), which + // RecordBatchIterator implements. Pin the return shape + // `(Arc, MergeStats)` — the publisher's CAS loop depends on + // both: the new Dataset to advance HEAD, the stats for the audit row. + let source: RecordBatchIterator>> = + unimplemented!(); + let result: (Arc, MergeStats) = job.execute_reader(source).await?; + let _ds: Arc = result.0; + let _stats: MergeStats = result.1; + Ok(()) +} + +// --- Guard 6: WriteParams::default() leaves data_storage_version = None ---- +// +// Our V2_2 pin is load-bearing for blob v2 (verified earlier this session +// when V2_1 produced "Blob v2 requires file version >= 2.2" on 13 blob +// tests). If Lance changes the default to pin some version itself, audit +// every `data_storage_version: Some(LanceFileVersion::V2_2)` site. + +#[test] +fn write_params_default_does_not_set_storage_version() { + let params = WriteParams::default(); + assert_eq!( + params.data_storage_version, None, + "WriteParams::default().data_storage_version is no longer None; \ + audit every explicit V2_2 pin (see rg 'LanceFileVersion::V2_2')." + ); +} + +// --- Guard 7: compact_files signature -------------------------------------- +// +// `db/omnigraph/optimize.rs:107` calls `compact_files(&mut ds, options, None)`. +// Compile-only. + +#[allow( + dead_code, + unreachable_code, + unused_variables, + unused_mut, + clippy::diverging_sub_expression +)] +async fn _compile_compact_files_signature() -> lance::Result<()> { + let mut ds: Dataset = unimplemented!(); + let options: CompactionOptions = CompactionOptions::default(); + let _metrics = compact_files(&mut ds, options, None).await?; + Ok(()) +} + +// --- Guard 8: Dataset::delete returns DeleteResult { new_dataset, num_deleted_rows } --- +// +// `table_store.rs::delete_where` consumes both fields. When MR-A migrates +// `delete_where` to two-phase via `DeleteBuilder::execute_uncommitted`, this +// guard updates to pin the staged path. Compile-only. + +#[allow( + dead_code, + unreachable_code, + unused_variables, + unused_mut, + clippy::diverging_sub_expression +)] +async fn _compile_delete_result_field_shape() -> lance::Result<()> { + let mut ds: Dataset = unimplemented!(); + let result: DeleteResult = ds.delete("x = 1").await?; + let _new_dataset: Arc = result.new_dataset; + let _num_deleted: u64 = result.num_deleted_rows; + Ok(()) +} diff --git a/crates/omnigraph/tests/search.rs b/crates/omnigraph/tests/search.rs index a611a0f..c4454cf 100644 --- a/crates/omnigraph/tests/search.rs +++ b/crates/omnigraph/tests/search.rs @@ -3,7 +3,8 @@ mod helpers; use std::env; use arrow_array::{Array, StringArray}; -use lance_index::{DatasetIndexExt, is_system_index}; +use lance::index::DatasetIndexExt; +use lance_index::is_system_index; use serial_test::serial; use omnigraph::db::Omnigraph; diff --git a/docs/dev/lance.md b/docs/dev/lance.md index 713a7d7..4017dea 100644 --- a/docs/dev/lance.md +++ b/docs/dev/lance.md @@ -156,13 +156,26 @@ If a future need pulls one of these into scope, add a row to the matching domain When Lance ships a major release that changes any of the above (file format bump, new index type, transaction semantics change, new branching primitive), refresh this index in the same change as the omnigraph upgrade. Stale Lance pointers are worse than no pointers. -### Last alignment audit: 2026-05-02 (Lance 4.0.1 upstream; omnigraph pinned at 4.0.0) +### Last alignment audit: 2026-05-22 (Lance 6.0.1 upstream; omnigraph pinned at 6.0.1) -A full read-through of every index page above was performed in the MR-793 cycle. Findings (no code changes required for PR #70): +Migration from Lance 4.0.0 → 6.0.1 landed in this cycle (DataFusion 52 → 53, Arrow 57 → 58, lance-tokenizer 6.0.1 added, tantivy* removed). Direct 4 → 6 jump; v5.x was not used as an intermediate (rationale in `~/.claude/plans/shimmering-percolating-duckling.md`). Behavior-affecting findings: -- The MemWAL "three sub-pages" (Overview / Details / Implementation) turned out to be **anchor sections on the single existing page** at `https://lance.org/format/table/mem_wal/` — not separate URLs. Findings: MemWAL is opt-in (requires an unenforced primary key + explicit shard config; omnigraph doesn't use it), operates intra-table (LSM-tree for streaming writes into one Lance table), and does NOT overlap with MR-847's cross-table manifest-vs-Lance-HEAD recovery problem. MR-847's design is unaffected. -- The distributed-indexing guide names Python APIs (`commit_existing_index_segments`, `merge_existing_index_segments`); the Rust analogues exist via `CreateIndexBuilder::execute_uncommitted` for scalar indices but **`build_index_metadata_from_segments` is `pub(crate)`** and blocks vector-index two-phase commits from outside the lance crate. Filed [lance-format/lance#6666](https://github.com/lance-format/lance/issues/6666) as a companion to [#6658](https://github.com/lance-format/lance/issues/6658). -- "Stable Row ID for Index" is documented as **experimental** in lance-4.0.x. Our datasets enable stable row IDs at the dataset level (`WriteParams::enable_stable_row_ids = true`); confirming whether our created indices opt into stable-row-id mode is a follow-up worth doing before MR-848 (index reconciler) lands. -- Fragment Reuse Index (FRI) is documented as one of three compaction strategies. omnigraph currently uses option 2 (immediate index rewrite at compaction time, via `omnigraph optimize`'s post-compaction rebuild). Adopting FRI is the explicit option for compaction-friendly index updates; relevant to MR-848. +- **DatasetIndexExt moved** from `lance-index` to `lance::index` (Lance PR #6280, v5.0). Six import sites updated. `lance-index::IndexType` and `lance-index::is_system_index` stayed in `lance-index`. `omnigraph-cli` and `omnigraph-server` gained `lance = { workspace = true }` in their dev-dependencies. +- **`DescribeTableResponse` gained `is_only_declared: Option`** (lance-namespace 6.0+, v5.0 PR #6186). Set to `Some(false)` in both `BranchManifestNamespace::describe_table` and `StagedTableNamespace::describe_table` — every table we return is physically materialized via `Dataset::open`, never "declared-only." +- **`MergeInsertBuilder` execute_reader return shape preserved** `(Arc, MergeStats)`; the publisher CAS chain at `db/manifest/publisher.rs:370-391` works unchanged. Pinned by `tests/lance_surface_guards.rs::_compile_merge_insert_builder_method_chain`. +- **`LanceError::TooMuchWriteContention` variant retained** in v6.0.1 (no rename). The typed publisher translation at `db/manifest/publisher.rs:417-430` continues to apply. Pinned by `lance_surface_guards.rs::lance_error_too_much_write_contention_variant_exists`. +- **`ManifestLocation` field shape stable**: `.path: object_store::path::Path`, `.size: Option`, `.e_tag: Option`, `.naming_scheme: ManifestNamingScheme`. Pinned by `lance_surface_guards.rs::manifest_location_field_shape`. +- **`LanceFileVersion::default()` flipped V2_0 → V2_1** (v5.0). No effect — every `data_storage_version` callsite explicitly pins `Some(LanceFileVersion::V2_2)` (load-bearing for blob v2: `Blob v2 requires file version >= 2.2` enforced in `lance/src/dataset/write.rs:748`). +- **`Dataset::checkout_version(N).await?.restore().await?`**: `restore()` takes `&mut self` and returns `Result<()>` (mutates in place, does not consume + return a new dataset). The recovery rollback hammer at `db/manifest/recovery.rs:505-522` continues to work. Pinned by `lance_surface_guards.rs::_compile_checkout_version_then_restore_signature`. +- **`DatasetBuilder::from_namespace(...).with_branch(...).with_version(...).load()`** surface preserved (the namespace builder chain at `db/manifest/namespace.rs:162-174`). Pinned by `lance_surface_guards.rs::_compile_dataset_builder_from_namespace_signature`. +- **`compact_files(&mut ds, CompactionOptions::default(), None)`** signature stable. `CompactionOptions` still does not expose `data_storage_version`; `compact_files` builds its own `WriteParams { ..Default::default() }`. Note: `LanceFileVersion::default()` is now V2_1 in v6, so optimize-rewritten fragments come out at V2_1 by default (was V2_0 in v4). Existing explicit V2_2 pins on creates/appends still apply. +- **`Dataset::delete(predicate)` returns `DeleteResult { new_dataset: Arc, num_deleted_rows: u64 }`** — unchanged shape. Pinned by `lance_surface_guards.rs::_compile_delete_result_field_shape`. MR-A will repurpose this guard to the staged two-phase variant once `DeleteBuilder::execute_uncommitted` migration lands. +- **File reader read methods now async** (Lance PR #6710, v6.0). No effect — omnigraph reaches Lance exclusively through `Dataset::scan` and the staged-write API. +- **Tokenizer vendored as `lance-tokenizer`** (Lance PR #6512, v6.0). No effect — no direct tokenizer imports. +- **Lance #6658 closed** (2026-05-14) but `DeleteBuilder::execute_uncommitted` did **not** ship in v6.0.1 — binary search across the release stream shows it first appears in `v7.0.0-beta.10` (the closing commits landed on main but didn't backport to the 6.x line). Tracked as MR-A: migrate `delete_where` to staged, retire the parse-time D2 mutation rule, extend recovery sidecar coverage. **Gated on the Lance v7.x bump**, not this PR. v7.0.0-rc.1 dropped 2026-05-21. +- **Lance #6666 still open** (`build_index_metadata_from_segments` public): vector-index two-phase blocked; inline `create_vector_index` residual retained. +- **Lance #6877 still open** (`MergeInsertBuilder` dup-rowid): PR #109's `SourceDedupeBehavior::FirstSeen` + `check_batch_unique_by_keys` precondition stay load-bearing. + +Surface guards added: `crates/omnigraph/tests/lance_surface_guards.rs` (8 named guards; 3 runtime + 5 compile-only). Future Lance bumps re-run this file first as the smoke check. Two additional guards from the original plan deferred to follow-up (`manifest_cas_returns_row_level_contention_variant` needs full publisher-race harness; `table_version_metadata_byte_compatible_with_v4` needs `pub(crate)` reach extension). Bump this date stanza on the next alignment pass.