sqlite-vec/benchmarks-ann/datasets/nyt/Makefile
Alex Garcia 8544081a67
Add comprehensive ANN benchmarking suite (#279)
Extend benchmarks-ann/ with results database (SQLite with per-query detail
and continuous writes), dataset subfolder organization, --subset-size and
--warmup options. Supports systematic comparison across flat, rescore, IVF,
and DiskANN index types.
2026-03-31 01:29:49 -07:00

30 lines
658 B
Makefile

MODEL ?= minishlab/potion-base-8M
K ?= 100
BATCH_SIZE ?= 512
DATA_DIR ?= data
all: base.db contents.db
# Download NYT headlines CSVs from Kaggle (requires `kaggle` CLI + API token)
$(DATA_DIR):
kaggle datasets download -d johnbandy/new-york-times-headlines -p $(DATA_DIR) --unzip
contents.db: $(DATA_DIR)
uv run build-contents.py --data-dir $(DATA_DIR) -o $@
base.db: contents.db queries.txt
uv run build-base.py \
--contents-db contents.db \
--model $(MODEL) \
--queries-file queries.txt \
--batch-size $(BATCH_SIZE) \
--k $(K) \
-o $@
clean:
rm -f base.db contents.db
clean-all: clean
rm -rf $(DATA_DIR)
.PHONY: all clean clean-all