mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 08:46:49 +02:00
Add comprehensive ANN benchmarking suite
Extend benchmarks-ann/ with results database (SQLite with per-query detail and continuous writes), dataset subfolder organization, --subset-size and --warmup options. Supports systematic comparison across flat, rescore, IVF, and DiskANN index types.
This commit is contained in:
parent
a248ecd061
commit
dbbb4b98f7
26 changed files with 2127 additions and 292 deletions
30
benchmarks-ann/datasets/nyt/Makefile
Normal file
30
benchmarks-ann/datasets/nyt/Makefile
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
MODEL ?= minishlab/potion-base-8M
|
||||
K ?= 100
|
||||
BATCH_SIZE ?= 512
|
||||
DATA_DIR ?= data
|
||||
|
||||
all: base.db contents.db
|
||||
|
||||
# Download NYT headlines CSVs from Kaggle (requires `kaggle` CLI + API token)
|
||||
$(DATA_DIR):
|
||||
kaggle datasets download -d johnbandy/new-york-times-headlines -p $(DATA_DIR) --unzip
|
||||
|
||||
contents.db: $(DATA_DIR)
|
||||
uv run build-contents.py --data-dir $(DATA_DIR) -o $@
|
||||
|
||||
base.db: contents.db queries.txt
|
||||
uv run build-base.py \
|
||||
--contents-db contents.db \
|
||||
--model $(MODEL) \
|
||||
--queries-file queries.txt \
|
||||
--batch-size $(BATCH_SIZE) \
|
||||
--k $(K) \
|
||||
-o $@
|
||||
|
||||
clean:
|
||||
rm -f base.db contents.db
|
||||
|
||||
clean-all: clean
|
||||
rm -rf $(DATA_DIR)
|
||||
|
||||
.PHONY: all clean clean-all
|
||||
Loading…
Add table
Add a link
Reference in a new issue