Add rescore index for ANN queries

Add rescore index type: stores full-precision float vectors in a rowid-keyed
shadow table, quantizes to int8 for fast initial scan, then rescores top
candidates with original vectors. Includes config parser, shadow table
management, insert/delete support, KNN integration, compile flag
(SQLITE_VEC_ENABLE_RESCORE), fuzz targets, and tests.
This commit is contained in:
Alex Garcia 2026-03-29 19:45:54 -07:00
parent bf2455f2ba
commit ba0db0b6d6
19 changed files with 3378 additions and 8 deletions

View file

@ -21,9 +21,14 @@ BASELINES = \
# ANNOY_CONFIGS = \
# "annoy-t50:type=annoy,n_trees=50"
ALL_CONFIGS = $(BASELINES)
RESCORE_CONFIGS = \
"rescore-bit-os8:type=rescore,quantizer=bit,oversample=8" \
"rescore-bit-os16:type=rescore,quantizer=bit,oversample=16" \
"rescore-int8-os8:type=rescore,quantizer=int8,oversample=8"
.PHONY: seed ground-truth bench-smoke bench-10k bench-50k bench-100k bench-all \
ALL_CONFIGS = $(BASELINES) $(RESCORE_CONFIGS)
.PHONY: seed ground-truth bench-smoke bench-rescore bench-10k bench-50k bench-100k bench-all \
report clean
# --- Data preparation ---
@ -40,6 +45,10 @@ bench-smoke: seed
$(BENCH) --subset-size 5000 -k 10 -n 20 -o runs/smoke \
$(BASELINES)
bench-rescore: seed
$(BENCH) --subset-size 10000 -k 10 -o runs/rescore \
$(RESCORE_CONFIGS)
# --- Standard sizes ---
bench-10k: seed
$(BENCH) --subset-size 10000 -k 10 -o runs/10k $(ALL_CONFIGS)