Remove vec_npy_each from default entrypoint and move to sqlite3_vec_numpy_init entrypoint

This commit is contained in:
Alex Garcia 2024-09-25 23:07:17 -07:00
parent 70dce09747
commit 763aad5d6a
6 changed files with 92 additions and 187 deletions

View file

@ -119,7 +119,6 @@ FUNCTIONS = [
MODULES = [
"vec0",
"vec_each",
"vec_npy_each",
# "vec_static_blob_entries",
# "vec_static_blobs",
]
@ -1619,6 +1618,7 @@ def to_npy(arr):
def test_vec_npy_each():
db = connect(EXT_PATH, extra_entrypoint="sqlite3_vec_numpy_init")
vec_npy_each = lambda *args: execute_all(
db, "select rowid, * from vec_npy_each(?)", args
)
@ -1651,6 +1651,7 @@ def test_vec_npy_each():
def test_vec_npy_each_errors():
db = connect(EXT_PATH, extra_entrypoint="sqlite3_vec_numpy_init")
vec_npy_each = lambda *args: execute_all(
db, "select rowid, * from vec_npy_each(?)", args
)
@ -1769,7 +1770,7 @@ import tempfile
def test_vec_npy_each_errors_files():
db = connect(EXT_PATH, extra_entrypoint="sqlite3_vec_fs_read_init")
db = connect(EXT_PATH, extra_entrypoint="sqlite3_vec_numpy_init")
def vec_npy_each(data):
with tempfile.NamedTemporaryFile(delete_on_close=False) as f:
@ -2274,36 +2275,42 @@ def test_smoke():
db.execute("insert into vec_xyz(rowid, a) select 2, X'0000000000000040'")
chunk = db.execute("select * from vec_xyz_chunks").fetchone()
assert chunk[
"rowids"
] == b"\x01\x00\x00\x00\x00\x00\x00\x00" + b"\x02\x00\x00\x00\x00\x00\x00\x00" + bytearray(
int(1024 * 8) - 8 * 2
assert (
chunk["rowids"]
== b"\x01\x00\x00\x00\x00\x00\x00\x00"
+ b"\x02\x00\x00\x00\x00\x00\x00\x00"
+ bytearray(int(1024 * 8) - 8 * 2)
)
assert chunk["chunk_id"] == 1
assert chunk["validity"] == b"\x03" + bytearray(int(1024 / 8) - 1)
vchunk = db.execute("select * from vec_xyz_vector_chunks00").fetchone()
assert vchunk["rowid"] == 1
assert vchunk[
"vectors"
] == b"\x00\x00\x00\x00\x00\x00\x80\x3f" + b"\x00\x00\x00\x00\x00\x00\x00\x40" + bytearray(
int(1024 * 4 * 2) - (2 * 4 * 2)
assert (
vchunk["vectors"]
== b"\x00\x00\x00\x00\x00\x00\x80\x3f"
+ b"\x00\x00\x00\x00\x00\x00\x00\x40"
+ bytearray(int(1024 * 4 * 2) - (2 * 4 * 2))
)
db.execute("insert into vec_xyz(rowid, a) select 3, X'00000000000080bf'")
chunk = db.execute("select * from vec_xyz_chunks").fetchone()
assert chunk["chunk_id"] == 1
assert chunk["validity"] == b"\x07" + bytearray(int(1024 / 8) - 1)
assert chunk[
"rowids"
] == b"\x01\x00\x00\x00\x00\x00\x00\x00" + b"\x02\x00\x00\x00\x00\x00\x00\x00" + b"\x03\x00\x00\x00\x00\x00\x00\x00" + bytearray(
int(1024 * 8) - 8 * 3
assert (
chunk["rowids"]
== b"\x01\x00\x00\x00\x00\x00\x00\x00"
+ b"\x02\x00\x00\x00\x00\x00\x00\x00"
+ b"\x03\x00\x00\x00\x00\x00\x00\x00"
+ bytearray(int(1024 * 8) - 8 * 3)
)
vchunk = db.execute("select * from vec_xyz_vector_chunks00").fetchone()
assert vchunk["rowid"] == 1
assert vchunk[
"vectors"
] == b"\x00\x00\x00\x00\x00\x00\x80\x3f" + b"\x00\x00\x00\x00\x00\x00\x00\x40" + b"\x00\x00\x00\x00\x00\x00\x80\xbf" + bytearray(
int(1024 * 4 * 2) - (2 * 4 * 3)
assert (
vchunk["vectors"]
== b"\x00\x00\x00\x00\x00\x00\x80\x3f"
+ b"\x00\x00\x00\x00\x00\x00\x00\x40"
+ b"\x00\x00\x00\x00\x00\x00\x80\xbf"
+ bytearray(int(1024 * 4 * 2) - (2 * 4 * 3))
)
# db.execute("select * from vec_xyz")
@ -2346,66 +2353,63 @@ def test_vec0_stress_small_chunks():
{"rowid": 994, "a": _f32([99.4] * 8)},
{"rowid": 993, "a": _f32([99.3] * 8)},
]
assert (
execute_all(
db,
"""
assert execute_all(
db,
"""
select rowid, a, distance
from vec_small
where a match ?
and k = 9
order by distance
""",
[_f32([50.0] * 8)],
)
== [
{
"a": _f32([500 * 0.1] * 8),
"distance": 0.0,
"rowid": 500,
},
{
"a": _f32([501 * 0.1] * 8),
"distance": 0.2828384041786194,
"rowid": 501,
},
{
"a": _f32([499 * 0.1] * 8),
"distance": 0.2828384041786194,
"rowid": 499,
},
{
"a": _f32([502 * 0.1] * 8),
"distance": 0.5656875967979431,
"rowid": 502,
},
{
"a": _f32([498 * 0.1] * 8),
"distance": 0.5656875967979431,
"rowid": 498,
},
{
"a": _f32([503 * 0.1] * 8),
"distance": 0.8485260009765625,
"rowid": 503,
},
{
"a": _f32([497 * 0.1] * 8),
"distance": 0.8485260009765625,
"rowid": 497,
},
{
"a": _f32([496 * 0.1] * 8),
"distance": 1.1313751935958862,
"rowid": 496,
},
{
"a": _f32([504 * 0.1] * 8),
"distance": 1.1313751935958862,
"rowid": 504,
},
]
)
[_f32([50.0] * 8)],
) == [
{
"a": _f32([500 * 0.1] * 8),
"distance": 0.0,
"rowid": 500,
},
{
"a": _f32([501 * 0.1] * 8),
"distance": 0.2828384041786194,
"rowid": 501,
},
{
"a": _f32([499 * 0.1] * 8),
"distance": 0.2828384041786194,
"rowid": 499,
},
{
"a": _f32([502 * 0.1] * 8),
"distance": 0.5656875967979431,
"rowid": 502,
},
{
"a": _f32([498 * 0.1] * 8),
"distance": 0.5656875967979431,
"rowid": 498,
},
{
"a": _f32([503 * 0.1] * 8),
"distance": 0.8485260009765625,
"rowid": 503,
},
{
"a": _f32([497 * 0.1] * 8),
"distance": 0.8485260009765625,
"rowid": 497,
},
{
"a": _f32([496 * 0.1] * 8),
"distance": 1.1313751935958862,
"rowid": 496,
},
{
"a": _f32([504 * 0.1] * 8),
"distance": 1.1313751935958862,
"rowid": 504,
},
]
def test_vec0_distance_metric():