mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 08:46:49 +02:00
Add comprehensive ANN benchmarking suite
Extend benchmarks-ann/ with results database (SQLite with per-query detail and continuous writes), dataset subfolder organization, --subset-size and --warmup options. Supports systematic comparison across flat, rescore, IVF, and DiskANN index types.
This commit is contained in:
parent
a248ecd061
commit
dbbb4b98f7
26 changed files with 2127 additions and 292 deletions
27
benchmarks-ann/datasets/cohere10m/Makefile
Normal file
27
benchmarks-ann/datasets/cohere10m/Makefile
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
BASE_URL = https://assets.zilliz.com/benchmark/cohere_large_10m
|
||||
|
||||
TRAIN_PARQUETS = $(shell printf 'train-%02d-of-10.parquet ' 0 1 2 3 4 5 6 7 8 9)
|
||||
OTHER_PARQUETS = test.parquet neighbors.parquet
|
||||
PARQUETS = $(TRAIN_PARQUETS) $(OTHER_PARQUETS)
|
||||
|
||||
.PHONY: all download clean
|
||||
|
||||
all: base.db
|
||||
|
||||
# Use: make -j12 download
|
||||
download: $(PARQUETS)
|
||||
|
||||
train-%-of-10.parquet:
|
||||
curl -L -o $@ $(BASE_URL)/$@
|
||||
|
||||
test.parquet:
|
||||
curl -L -o $@ $(BASE_URL)/test.parquet
|
||||
|
||||
neighbors.parquet:
|
||||
curl -L -o $@ $(BASE_URL)/neighbors.parquet
|
||||
|
||||
base.db: $(PARQUETS) build_base_db.py
|
||||
uv run --with pandas --with pyarrow python build_base_db.py
|
||||
|
||||
clean:
|
||||
rm -f base.db
|
||||
Loading…
Add table
Add a link
Reference in a new issue