sqlite-vec/benchmarks/self-params/knn.py

84 lines
2.1 KiB
Python
Raw Normal View History

2024-04-20 13:38:58 -07:00
import sqlite3
import time
from random import randrange
from statistics import mean
def connect(path):
print(path)
db = sqlite3.connect(path)
db.enable_load_extension(True)
db.load_extension("../dist/vec0")
db.execute("select load_extension('../dist/vec0', 'sqlite3_vec_fs_read_init')")
db.enable_load_extension(False)
return db
page_sizes = [ # 4096, 8192,
16384,
32768,
]
chunk_sizes = [128, 256, 1024, 2048]
types = ["f32", "int8", "bit"]
types.reverse()
for t in types:
for page_size in page_sizes:
for chunk_size in chunk_sizes:
print(f"page_size={page_size}, chunk_size={chunk_size}")
func = "embedding"
if t == "int8":
func = "vec_quantize_i8(embedding, 'unit')"
if t == "bit":
func = "vec_quantize_binary(embedding)"
times = []
trials = 20
db = connect(f"dbs/test.{page_size}.{chunk_size}.{t}.db")
for trial in range(trials):
t0 = time.time()
results = db.execute(
f"""
select rowid
from vec_items
where embedding match (select {func} from vec_items where rowid = ?)
and k = 10
order by distance
""",
[randrange(100000)],
).fetchall()
times.append(time.time() - t0)
print(mean(times))
"""
page_size=4096, chunk_size=256
0.2635102152824402
page_size=4096, chunk_size=1024
0.2609449863433838
page_size=4096, chunk_size=2048
0.275589919090271
page_size=8192, chunk_size=256
0.18621582984924318
page_size=8192, chunk_size=1024
0.20939643383026124
page_size=8192, chunk_size=2048
0.22376316785812378
page_size=16384, chunk_size=256
0.16012665033340454
page_size=16384, chunk_size=1024
0.18346318006515502
page_size=16384, chunk_size=2048
0.18224761486053467
page_size=32768, chunk_size=256
0.14202518463134767
page_size=32768, chunk_size=1024
0.15340715646743774
page_size=32768, chunk_size=2048
0.18018823862075806
"""