mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 16:56:27 +02:00
Add rescore index for ANN queries
Add rescore index type: stores full-precision float vectors in a rowid-keyed shadow table, quantizes to int8 for fast initial scan, then rescores top candidates with original vectors. Includes config parser, shadow table management, insert/delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_RESCORE), fuzz targets, and tests.
This commit is contained in:
parent
bf2455f2ba
commit
ba0db0b6d6
19 changed files with 3378 additions and 8 deletions
|
|
@ -21,9 +21,14 @@ BASELINES = \
|
|||
# ANNOY_CONFIGS = \
|
||||
# "annoy-t50:type=annoy,n_trees=50"
|
||||
|
||||
ALL_CONFIGS = $(BASELINES)
|
||||
RESCORE_CONFIGS = \
|
||||
"rescore-bit-os8:type=rescore,quantizer=bit,oversample=8" \
|
||||
"rescore-bit-os16:type=rescore,quantizer=bit,oversample=16" \
|
||||
"rescore-int8-os8:type=rescore,quantizer=int8,oversample=8"
|
||||
|
||||
.PHONY: seed ground-truth bench-smoke bench-10k bench-50k bench-100k bench-all \
|
||||
ALL_CONFIGS = $(BASELINES) $(RESCORE_CONFIGS)
|
||||
|
||||
.PHONY: seed ground-truth bench-smoke bench-rescore bench-10k bench-50k bench-100k bench-all \
|
||||
report clean
|
||||
|
||||
# --- Data preparation ---
|
||||
|
|
@ -40,6 +45,10 @@ bench-smoke: seed
|
|||
$(BENCH) --subset-size 5000 -k 10 -n 20 -o runs/smoke \
|
||||
$(BASELINES)
|
||||
|
||||
bench-rescore: seed
|
||||
$(BENCH) --subset-size 10000 -k 10 -o runs/rescore \
|
||||
$(RESCORE_CONFIGS)
|
||||
|
||||
# --- Standard sizes ---
|
||||
bench-10k: seed
|
||||
$(BENCH) --subset-size 10000 -k 10 -o runs/10k $(ALL_CONFIGS)
|
||||
|
|
|
|||
|
|
@ -140,6 +140,39 @@ INDEX_REGISTRY["baseline"] = {
|
|||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Rescore implementation
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _rescore_create_table_sql(params):
|
||||
quantizer = params.get("quantizer", "bit")
|
||||
oversample = params.get("oversample", 8)
|
||||
return (
|
||||
f"CREATE VIRTUAL TABLE vec_items USING vec0("
|
||||
f" chunk_size=256,"
|
||||
f" id integer primary key,"
|
||||
f" embedding float[768] distance_metric=cosine"
|
||||
f" indexed by rescore(quantizer={quantizer}, oversample={oversample}))"
|
||||
)
|
||||
|
||||
|
||||
def _rescore_describe(params):
|
||||
q = params.get("quantizer", "bit")
|
||||
os = params.get("oversample", 8)
|
||||
return f"rescore {q} (os={os})"
|
||||
|
||||
|
||||
INDEX_REGISTRY["rescore"] = {
|
||||
"defaults": {"quantizer": "bit", "oversample": 8},
|
||||
"create_table_sql": _rescore_create_table_sql,
|
||||
"insert_sql": None,
|
||||
"post_insert_hook": None,
|
||||
"run_query": None, # default MATCH query works — rescore is automatic
|
||||
"describe": _rescore_describe,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Config parsing
|
||||
# ============================================================================
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue