Add comprehensive ANN benchmarking suite (#279)

Extend benchmarks-ann/ with results database (SQLite with per-query detail
and continuous writes), dataset subfolder organization, --subset-size and
--warmup options. Supports systematic comparison across flat, rescore, IVF,
and DiskANN index types.
This commit is contained in:
Alex Garcia 2026-03-31 01:29:49 -07:00 committed by GitHub
parent a248ecd061
commit 8544081a67
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
26 changed files with 2127 additions and 292 deletions

View file

@ -0,0 +1,13 @@
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "model2vec[distill]",
# "torch<=2.7",
# ]
# ///
from model2vec.distill import distill
model = distill(model_name="BAAI/bge-base-en-v1.5", pca_dims=768)
model.save_pretrained("bge-base-en-v1.5-768")
print("Saved distilled model to bge-base-en-v1.5-768/")