Add ANN search support for vec0 virtual table (#273)

Add approximate nearest neighbor infrastructure to vec0: shared distance
dispatch (vec0_distance_full), flat index type with parser, NEON-optimized
cosine/Hamming for float32/int8, amalgamation script, and benchmark suite
(benchmarks-ann/) with ground-truth generation and profiling tools. Remove
unused vec_npy_each/vec_static_blobs code, fix missing stdint.h include.
This commit is contained in:
Alex Garcia 2026-03-31 01:03:32 -07:00 committed by GitHub
parent e9f598abfa
commit 0de765f457
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
27 changed files with 2177 additions and 2116 deletions

35
benchmarks-ann/schema.sql Normal file
View file

@ -0,0 +1,35 @@
-- Canonical results schema for vec0 KNN benchmark comparisons.
-- The index_type column is a free-form TEXT field. Baseline configs use
-- "baseline"; index-specific branches add their own types (registered
-- via INDEX_REGISTRY in bench.py).
CREATE TABLE IF NOT EXISTS build_results (
config_name TEXT NOT NULL,
index_type TEXT NOT NULL,
subset_size INTEGER NOT NULL,
db_path TEXT NOT NULL,
insert_time_s REAL NOT NULL,
train_time_s REAL, -- NULL when no training/build step is needed
total_time_s REAL NOT NULL,
rows INTEGER NOT NULL,
file_size_mb REAL NOT NULL,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
PRIMARY KEY (config_name, subset_size)
);
CREATE TABLE IF NOT EXISTS bench_results (
config_name TEXT NOT NULL,
index_type TEXT NOT NULL,
subset_size INTEGER NOT NULL,
k INTEGER NOT NULL,
n INTEGER NOT NULL,
mean_ms REAL NOT NULL,
median_ms REAL NOT NULL,
p99_ms REAL NOT NULL,
total_ms REAL NOT NULL,
qps REAL NOT NULL,
recall REAL NOT NULL,
db_path TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
PRIMARY KEY (config_name, subset_size, k)
);