mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 00:36:56 +02:00
Add comprehensive ANN benchmarking suite (#279)
Extend benchmarks-ann/ with results database (SQLite with per-query detail and continuous writes), dataset subfolder organization, --subset-size and --warmup options. Supports systematic comparison across flat, rescore, IVF, and DiskANN index types.
This commit is contained in:
parent
a248ecd061
commit
8544081a67
26 changed files with 2127 additions and 292 deletions
76
benchmarks-ann/results_schema.sql
Normal file
76
benchmarks-ann/results_schema.sql
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
-- Comprehensive results schema for vec0 KNN benchmark runs.
|
||||
-- Created in WAL mode: PRAGMA journal_mode=WAL
|
||||
|
||||
CREATE TABLE IF NOT EXISTS runs (
|
||||
run_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
config_name TEXT NOT NULL,
|
||||
index_type TEXT NOT NULL,
|
||||
params TEXT NOT NULL, -- JSON: {"R":48,"L":128,"quantizer":"binary"}
|
||||
dataset TEXT NOT NULL, -- "cohere1m"
|
||||
subset_size INTEGER NOT NULL,
|
||||
k INTEGER NOT NULL,
|
||||
n_queries INTEGER NOT NULL,
|
||||
phase TEXT NOT NULL DEFAULT 'both',
|
||||
-- 'build', 'query', or 'both'
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
-- pending → inserting → training → querying → done | built | error
|
||||
created_at_ns INTEGER NOT NULL -- time.time_ns()
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS run_results (
|
||||
run_id INTEGER PRIMARY KEY REFERENCES runs(run_id),
|
||||
insert_started_ns INTEGER,
|
||||
insert_ended_ns INTEGER,
|
||||
insert_duration_ns INTEGER,
|
||||
train_started_ns INTEGER, -- NULL if no training
|
||||
train_ended_ns INTEGER,
|
||||
train_duration_ns INTEGER,
|
||||
build_duration_ns INTEGER, -- insert + train
|
||||
db_file_size_bytes INTEGER,
|
||||
db_file_path TEXT,
|
||||
create_sql TEXT, -- CREATE VIRTUAL TABLE ...
|
||||
insert_sql TEXT, -- INSERT INTO vec_items ...
|
||||
train_sql TEXT, -- NULL if no training step
|
||||
query_sql TEXT, -- SELECT ... WHERE embedding MATCH ...
|
||||
k INTEGER, -- denormalized from runs for easy filtering
|
||||
query_mean_ms REAL, -- denormalized aggregates
|
||||
query_median_ms REAL,
|
||||
query_p99_ms REAL,
|
||||
query_total_ms REAL,
|
||||
qps REAL,
|
||||
recall REAL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS insert_batches (
|
||||
batch_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
run_id INTEGER NOT NULL REFERENCES runs(run_id),
|
||||
batch_lo INTEGER NOT NULL, -- start index (inclusive)
|
||||
batch_hi INTEGER NOT NULL, -- end index (exclusive)
|
||||
rows_in_batch INTEGER NOT NULL,
|
||||
started_ns INTEGER NOT NULL,
|
||||
ended_ns INTEGER NOT NULL,
|
||||
duration_ns INTEGER NOT NULL,
|
||||
cumulative_rows INTEGER NOT NULL, -- total rows inserted so far
|
||||
rate_rows_per_s REAL NOT NULL -- cumulative rate
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS queries (
|
||||
query_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
run_id INTEGER NOT NULL REFERENCES runs(run_id),
|
||||
k INTEGER NOT NULL,
|
||||
query_vector_id INTEGER NOT NULL,
|
||||
started_ns INTEGER NOT NULL,
|
||||
ended_ns INTEGER NOT NULL,
|
||||
duration_ms REAL NOT NULL,
|
||||
result_ids TEXT NOT NULL, -- JSON array
|
||||
result_distances TEXT NOT NULL, -- JSON array
|
||||
ground_truth_ids TEXT NOT NULL, -- JSON array
|
||||
recall REAL NOT NULL,
|
||||
UNIQUE(run_id, k, query_vector_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_runs_config ON runs(config_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_runs_type ON runs(index_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_runs_status ON runs(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_batches_run ON insert_batches(run_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_queries_run ON queries(run_id);
|
||||
Loading…
Add table
Add a link
Reference in a new issue