Add ANN search support for vec0 virtual table

Add approximate nearest neighbor infrastructure to vec0: shared distance
dispatch (vec0_distance_full), flat index type with parser, NEON-optimized
cosine/Hamming for float32/int8, amalgamation script, and benchmark suite
(benchmarks-ann/) with ground-truth generation and profiling tools. Remove
unused vec_npy_each/vec_static_blobs code, fix missing stdint.h include.
This commit is contained in:
Alex Garcia 2026-03-29 19:44:44 -07:00
parent dfd8dc5290
commit bf2455f2ba
27 changed files with 2177 additions and 2116 deletions

View file

@ -1,6 +1,5 @@
from typing import List
from struct import pack
from sqlite3 import Connection
def serialize_float32(vector: List[float]) -> bytes:
@ -13,33 +12,3 @@ def serialize_int8(vector: List[int]) -> bytes:
return pack("%sb" % len(vector), *vector)
try:
import numpy.typing as npt
def register_numpy(db: Connection, name: str, array: npt.NDArray):
"""ayoo"""
ptr = array.__array_interface__["data"][0]
nvectors, dimensions = array.__array_interface__["shape"]
element_type = array.__array_interface__["typestr"]
assert element_type == "<f4"
name_escaped = db.execute("select printf('%w', ?)", [name]).fetchone()[0]
db.execute(
"""
insert into temp.vec_static_blobs(name, data)
select ?, vec_static_blob_from_raw(?, ?, ?, ?)
""",
[name, ptr, element_type, dimensions, nvectors],
)
db.execute(
f'create virtual table "{name_escaped}" using vec_static_blob_entries({name_escaped})'
)
except ImportError:
def register_numpy(db: Connection, name: str, array):
raise Exception("numpy package is required for register_numpy")