Add comprehensive ANN benchmarking suite (#279)

Extend benchmarks-ann/ with results database (SQLite with per-query detail and continuous writes), dataset subfolder organization, --subset-size and --warmup options. Supports systematic comparison across flat, rescore, IVF, and DiskANN index types.
2026-04-25 16:56:27 +02:00 · 2026-03-31 01:29:49 -07:00 · 2026-03-31 01:29:49 -07:00 · 8544081a67
commit 8544081a67
parent a248ecd061
26 changed files with 2127 additions and 292 deletions
--- a/benchmarks-ann/datasets/nyt-768/distill-model.py
+++ b/benchmarks-ann/datasets/nyt-768/distill-model.py
@ -0,0 +1,13 @@
+# /// script
+# requires-python = ">=3.12"
+# dependencies = [
+#     "model2vec[distill]",
+#     "torch<=2.7",
+# ]
+# ///
+
+from model2vec.distill import distill
+
+model = distill(model_name="BAAI/bge-base-en-v1.5", pca_dims=768)
+model.save_pretrained("bge-base-en-v1.5-768")
+print("Saved distilled model to bge-base-en-v1.5-768/")