mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 08:46:49 +02:00
Add comprehensive ANN benchmarking suite
Extend benchmarks-ann/ with results database (SQLite with per-query detail and continuous writes), dataset subfolder organization, --subset-size and --warmup options. Supports systematic comparison across flat, rescore, IVF, and DiskANN index types.
This commit is contained in:
parent
a248ecd061
commit
dbbb4b98f7
26 changed files with 2127 additions and 292 deletions
37
benchmarks-ann/datasets/nyt-768/Makefile
Normal file
37
benchmarks-ann/datasets/nyt-768/Makefile
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
MODEL ?= bge-base-en-v1.5-768
|
||||
K ?= 100
|
||||
BATCH_SIZE ?= 512
|
||||
DATA_DIR ?= ../nyt/data
|
||||
|
||||
all: base.db
|
||||
|
||||
# Reuse data from ../nyt
|
||||
$(DATA_DIR):
|
||||
$(MAKE) -C ../nyt data
|
||||
|
||||
# Distill model (separate step, may take a while)
|
||||
$(MODEL):
|
||||
uv run distill-model.py
|
||||
|
||||
contents.db: $(DATA_DIR)
|
||||
uv run build-contents.py --data-dir $(DATA_DIR) -o $@
|
||||
|
||||
base.db: contents.db queries.txt $(MODEL)
|
||||
uv run ../nyt/build-base.py \
|
||||
--contents-db contents.db \
|
||||
--model $(MODEL) \
|
||||
--queries-file queries.txt \
|
||||
--batch-size $(BATCH_SIZE) \
|
||||
--k $(K) \
|
||||
-o $@
|
||||
|
||||
queries.txt:
|
||||
cp ../nyt/queries.txt $@
|
||||
|
||||
clean:
|
||||
rm -f base.db contents.db
|
||||
|
||||
clean-all: clean
|
||||
rm -rf $(MODEL)
|
||||
|
||||
.PHONY: all clean clean-all
|
||||
Loading…
Add table
Add a link
Reference in a new issue