Add ANN search support for vec0 virtual table (#273)

Add approximate nearest neighbor infrastructure to vec0: shared distance
dispatch (vec0_distance_full), flat index type with parser, NEON-optimized
cosine/Hamming for float32/int8, amalgamation script, and benchmark suite
(benchmarks-ann/) with ground-truth generation and profiling tools. Remove
unused vec_npy_each/vec_static_blobs code, fix missing stdint.h include.
This commit is contained in:
Alex Garcia 2026-03-31 01:03:32 -07:00 committed by GitHub
parent e9f598abfa
commit 0de765f457
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
27 changed files with 2177 additions and 2116 deletions

View file

@ -48,7 +48,6 @@ import json
db = sqlite3.connect(":memory:")
db.enable_load_extension(True)
db.load_extension("../../dist/vec0")
db.execute("select load_extension('../../dist/vec0', 'sqlite3_vec_fs_read_init')")
db.enable_load_extension(False)
results = db.execute(
@ -75,17 +74,21 @@ print(b)
db.execute('PRAGMA page_size=16384')
print("Loading into sqlite-vec vec0 table...")
t0 = time.time()
db.execute("create virtual table v using vec0(a float[3072], chunk_size=16)")
db.execute('insert into v select rowid, vector from vec_npy_each(vec_npy_file("dbpedia_openai_3_large_00.npy"))')
print(time.time() - t0)
print("loading numpy array...")
t0 = time.time()
base = np.load('dbpedia_openai_3_large_00.npy')
print(time.time() - t0)
print("Loading into sqlite-vec vec0 table...")
t0 = time.time()
db.execute("create virtual table v using vec0(a float[3072], chunk_size=16)")
with db:
db.executemany(
"insert into v(rowid, a) values (?, ?)",
[(i, row.tobytes()) for i, row in enumerate(base)],
)
print(time.time() - t0)
np.random.seed(1)
queries = base[np.random.choice(base.shape[0], 20, replace=False), :]