mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 16:56:27 +02:00
84 lines
2.1 KiB
Python
84 lines
2.1 KiB
Python
|
|
import sqlite3
|
||
|
|
import time
|
||
|
|
from random import randrange
|
||
|
|
from statistics import mean
|
||
|
|
|
||
|
|
|
||
|
|
def connect(path):
|
||
|
|
print(path)
|
||
|
|
db = sqlite3.connect(path)
|
||
|
|
db.enable_load_extension(True)
|
||
|
|
db.load_extension("../dist/vec0")
|
||
|
|
db.execute("select load_extension('../dist/vec0', 'sqlite3_vec_fs_read_init')")
|
||
|
|
db.enable_load_extension(False)
|
||
|
|
return db
|
||
|
|
|
||
|
|
|
||
|
|
page_sizes = [ # 4096, 8192,
|
||
|
|
16384,
|
||
|
|
32768,
|
||
|
|
]
|
||
|
|
chunk_sizes = [128, 256, 1024, 2048]
|
||
|
|
types = ["f32", "int8", "bit"]
|
||
|
|
|
||
|
|
types.reverse()
|
||
|
|
|
||
|
|
for t in types:
|
||
|
|
for page_size in page_sizes:
|
||
|
|
for chunk_size in chunk_sizes:
|
||
|
|
print(f"page_size={page_size}, chunk_size={chunk_size}")
|
||
|
|
|
||
|
|
func = "embedding"
|
||
|
|
if t == "int8":
|
||
|
|
func = "vec_quantize_i8(embedding, 'unit')"
|
||
|
|
if t == "bit":
|
||
|
|
func = "vec_quantize_binary(embedding)"
|
||
|
|
|
||
|
|
times = []
|
||
|
|
trials = 20
|
||
|
|
db = connect(f"dbs/test.{page_size}.{chunk_size}.{t}.db")
|
||
|
|
|
||
|
|
for trial in range(trials):
|
||
|
|
t0 = time.time()
|
||
|
|
results = db.execute(
|
||
|
|
f"""
|
||
|
|
select rowid
|
||
|
|
from vec_items
|
||
|
|
where embedding match (select {func} from vec_items where rowid = ?)
|
||
|
|
and k = 10
|
||
|
|
order by distance
|
||
|
|
""",
|
||
|
|
[randrange(100000)],
|
||
|
|
).fetchall()
|
||
|
|
|
||
|
|
times.append(time.time() - t0)
|
||
|
|
print(mean(times))
|
||
|
|
|
||
|
|
"""
|
||
|
|
|
||
|
|
page_size=4096, chunk_size=256
|
||
|
|
0.2635102152824402
|
||
|
|
page_size=4096, chunk_size=1024
|
||
|
|
0.2609449863433838
|
||
|
|
page_size=4096, chunk_size=2048
|
||
|
|
0.275589919090271
|
||
|
|
page_size=8192, chunk_size=256
|
||
|
|
0.18621582984924318
|
||
|
|
page_size=8192, chunk_size=1024
|
||
|
|
0.20939643383026124
|
||
|
|
page_size=8192, chunk_size=2048
|
||
|
|
0.22376316785812378
|
||
|
|
page_size=16384, chunk_size=256
|
||
|
|
0.16012665033340454
|
||
|
|
page_size=16384, chunk_size=1024
|
||
|
|
0.18346318006515502
|
||
|
|
page_size=16384, chunk_size=2048
|
||
|
|
0.18224761486053467
|
||
|
|
page_size=32768, chunk_size=256
|
||
|
|
0.14202518463134767
|
||
|
|
page_size=32768, chunk_size=1024
|
||
|
|
0.15340715646743774
|
||
|
|
page_size=32768, chunk_size=2048
|
||
|
|
0.18018823862075806
|
||
|
|
"""
|