mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 08:46:49 +02:00
Extend benchmarks-ann/ with results database (SQLite with per-query detail and continuous writes), dataset subfolder organization, --subset-size and --warmup options. Supports systematic comparison across flat, rescore, IVF, and DiskANN index types.
30 lines
658 B
Makefile
30 lines
658 B
Makefile
MODEL ?= minishlab/potion-base-8M
|
|
K ?= 100
|
|
BATCH_SIZE ?= 512
|
|
DATA_DIR ?= data
|
|
|
|
all: base.db contents.db
|
|
|
|
# Download NYT headlines CSVs from Kaggle (requires `kaggle` CLI + API token)
|
|
$(DATA_DIR):
|
|
kaggle datasets download -d johnbandy/new-york-times-headlines -p $(DATA_DIR) --unzip
|
|
|
|
contents.db: $(DATA_DIR)
|
|
uv run build-contents.py --data-dir $(DATA_DIR) -o $@
|
|
|
|
base.db: contents.db queries.txt
|
|
uv run build-base.py \
|
|
--contents-db contents.db \
|
|
--model $(MODEL) \
|
|
--queries-file queries.txt \
|
|
--batch-size $(BATCH_SIZE) \
|
|
--k $(K) \
|
|
-o $@
|
|
|
|
clean:
|
|
rm -f base.db contents.db
|
|
|
|
clean-all: clean
|
|
rm -rf $(DATA_DIR)
|
|
|
|
.PHONY: all clean clean-all
|