mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 00:36:56 +02:00
Initial commit
This commit is contained in:
commit
4c8ad629e0
28 changed files with 6758 additions and 0 deletions
85
benchmarks/self-params/build.py
Normal file
85
benchmarks/self-params/build.py
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
import sqlite3
|
||||
import time
|
||||
|
||||
|
||||
def connect(path):
|
||||
db = sqlite3.connect(path)
|
||||
db.enable_load_extension(True)
|
||||
db.load_extension("../dist/vec0")
|
||||
db.execute("select load_extension('../dist/vec0', 'sqlite3_vec_fs_read_init')")
|
||||
db.enable_load_extension(False)
|
||||
return db
|
||||
|
||||
|
||||
page_sizes = [ # 4096, 8192,
|
||||
16384,
|
||||
32768,
|
||||
]
|
||||
chunk_sizes = [128, 256, 1024, 2048]
|
||||
types = ["f32", "int8", "bit"]
|
||||
|
||||
SRC = "../examples/dbpedia-openai/data/vectors.npy"
|
||||
|
||||
for page_size in page_sizes:
|
||||
for chunk_size in chunk_sizes:
|
||||
for t in types:
|
||||
print(f"{t} page_size={page_size}, chunk_size={chunk_size}")
|
||||
|
||||
t0 = time.time()
|
||||
db = connect(f"dbs/test.{page_size}.{chunk_size}.{t}.db")
|
||||
db.execute(f"pragma page_size = {page_size}")
|
||||
with db:
|
||||
db.execute(
|
||||
f"""
|
||||
create virtual table vec_items using vec0(
|
||||
embedding {t}[1536],
|
||||
chunk_size={chunk_size}
|
||||
)
|
||||
"""
|
||||
)
|
||||
func = "vector"
|
||||
if t == "int8":
|
||||
func = "vec_quantize_i8(vector, 'unit')"
|
||||
if t == "bit":
|
||||
func = "vec_quantize_binary(vector)"
|
||||
db.execute(
|
||||
f"""
|
||||
insert into vec_items
|
||||
select rowid, {func}
|
||||
from vec_npy_each(vec_npy_file(?))
|
||||
limit 100000
|
||||
""",
|
||||
[SRC],
|
||||
)
|
||||
elapsed = time.time() - t0
|
||||
print(elapsed)
|
||||
|
||||
"""
|
||||
|
||||
# for 100_000
|
||||
|
||||
page_size=4096, chunk_size=256
|
||||
3.5894200801849365
|
||||
page_size=4096, chunk_size=1024
|
||||
60.70046401023865
|
||||
page_size=4096, chunk_size=2048
|
||||
201.04426288604736
|
||||
page_size=8192, chunk_size=256
|
||||
7.034514904022217
|
||||
page_size=8192, chunk_size=1024
|
||||
9.983598947525024
|
||||
page_size=8192, chunk_size=2048
|
||||
12.318921089172363
|
||||
page_size=16384, chunk_size=256
|
||||
4.97080397605896
|
||||
page_size=16384, chunk_size=1024
|
||||
6.051195859909058
|
||||
page_size=16384, chunk_size=2048
|
||||
8.492683172225952
|
||||
page_size=32768, chunk_size=256
|
||||
5.906642198562622
|
||||
page_size=32768, chunk_size=1024
|
||||
5.876632213592529
|
||||
page_size=32768, chunk_size=2048
|
||||
5.420510292053223
|
||||
"""
|
||||
83
benchmarks/self-params/knn.py
Normal file
83
benchmarks/self-params/knn.py
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
import sqlite3
|
||||
import time
|
||||
from random import randrange
|
||||
from statistics import mean
|
||||
|
||||
|
||||
def connect(path):
|
||||
print(path)
|
||||
db = sqlite3.connect(path)
|
||||
db.enable_load_extension(True)
|
||||
db.load_extension("../dist/vec0")
|
||||
db.execute("select load_extension('../dist/vec0', 'sqlite3_vec_fs_read_init')")
|
||||
db.enable_load_extension(False)
|
||||
return db
|
||||
|
||||
|
||||
page_sizes = [ # 4096, 8192,
|
||||
16384,
|
||||
32768,
|
||||
]
|
||||
chunk_sizes = [128, 256, 1024, 2048]
|
||||
types = ["f32", "int8", "bit"]
|
||||
|
||||
types.reverse()
|
||||
|
||||
for t in types:
|
||||
for page_size in page_sizes:
|
||||
for chunk_size in chunk_sizes:
|
||||
print(f"page_size={page_size}, chunk_size={chunk_size}")
|
||||
|
||||
func = "embedding"
|
||||
if t == "int8":
|
||||
func = "vec_quantize_i8(embedding, 'unit')"
|
||||
if t == "bit":
|
||||
func = "vec_quantize_binary(embedding)"
|
||||
|
||||
times = []
|
||||
trials = 20
|
||||
db = connect(f"dbs/test.{page_size}.{chunk_size}.{t}.db")
|
||||
|
||||
for trial in range(trials):
|
||||
t0 = time.time()
|
||||
results = db.execute(
|
||||
f"""
|
||||
select rowid
|
||||
from vec_items
|
||||
where embedding match (select {func} from vec_items where rowid = ?)
|
||||
and k = 10
|
||||
order by distance
|
||||
""",
|
||||
[randrange(100000)],
|
||||
).fetchall()
|
||||
|
||||
times.append(time.time() - t0)
|
||||
print(mean(times))
|
||||
|
||||
"""
|
||||
|
||||
page_size=4096, chunk_size=256
|
||||
0.2635102152824402
|
||||
page_size=4096, chunk_size=1024
|
||||
0.2609449863433838
|
||||
page_size=4096, chunk_size=2048
|
||||
0.275589919090271
|
||||
page_size=8192, chunk_size=256
|
||||
0.18621582984924318
|
||||
page_size=8192, chunk_size=1024
|
||||
0.20939643383026124
|
||||
page_size=8192, chunk_size=2048
|
||||
0.22376316785812378
|
||||
page_size=16384, chunk_size=256
|
||||
0.16012665033340454
|
||||
page_size=16384, chunk_size=1024
|
||||
0.18346318006515502
|
||||
page_size=16384, chunk_size=2048
|
||||
0.18224761486053467
|
||||
page_size=32768, chunk_size=256
|
||||
0.14202518463134767
|
||||
page_size=32768, chunk_size=1024
|
||||
0.15340715646743774
|
||||
page_size=32768, chunk_size=2048
|
||||
0.18018823862075806
|
||||
"""
|
||||
24
benchmarks/self-params/test.py
Normal file
24
benchmarks/self-params/test.py
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
import sqlite3
|
||||
import time
|
||||
|
||||
|
||||
def connect(path):
|
||||
db = sqlite3.connect(path)
|
||||
db.enable_load_extension(True)
|
||||
db.load_extension("../dist/vec0")
|
||||
db.execute("select load_extension('../dist/vec0', 'sqlite3_vec_fs_read_init')")
|
||||
db.enable_load_extension(False)
|
||||
return db
|
||||
|
||||
|
||||
page_sizes = [4096, 8192, 16384, 32768]
|
||||
chunk_sizes = [256, 1024, 2048]
|
||||
|
||||
for page_size in page_sizes:
|
||||
for chunk_size in chunk_sizes:
|
||||
print(f"page_size={page_size}, chunk_size={chunk_size}")
|
||||
|
||||
t0 = time.time()
|
||||
db = connect(f"dbs/test.{page_size}.{chunk_size}.db")
|
||||
print(db.execute("pragma page_size").fetchone()[0])
|
||||
print(db.execute("select count(*) from vec_items_rowids").fetchone()[0])
|
||||
Loading…
Add table
Add a link
Reference in a new issue