mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 16:56:27 +02:00
Deduplicates exec(), vec0_shadow_table_contents(), _f32(), _i64(), and _int8() helpers that were copied across six test files. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
149 lines
5.1 KiB
Python
149 lines
5.1 KiB
Python
import sqlite3
|
|
import struct
|
|
import pytest
|
|
from helpers import _f32, exec
|
|
|
|
|
|
def test_insert_creates_chunks_and_vectors(db, snapshot):
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
|
|
vecs = [
|
|
[1.0, 2.0, 3.0, 4.0],
|
|
[5.0, 6.0, 7.0, 8.0],
|
|
[0.1, 0.2, 0.3, 0.4],
|
|
[10.0, 20.0, 30.0, 40.0],
|
|
[0.5, 0.5, 0.5, 0.5],
|
|
]
|
|
for i, v in enumerate(vecs, start=1):
|
|
db.execute("insert into v(rowid, emb) values (?, ?)", [i, _f32(v)])
|
|
|
|
assert exec(db, "select count(*) as cnt from v_rowids") == snapshot(
|
|
name="rowids_count"
|
|
)
|
|
assert exec(db, "select count(*) as cnt from v_vector_chunks00") == snapshot(
|
|
name="vector_chunks_count"
|
|
)
|
|
|
|
# Verify round-trip: each inserted vector comes back identical
|
|
for i, v in enumerate(vecs, start=1):
|
|
rows = db.execute("select emb from v where rowid = ?", [i]).fetchall()
|
|
assert len(rows) == 1
|
|
assert rows[0][0] == _f32(v)
|
|
|
|
|
|
def test_insert_auto_rowid(db):
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
|
|
vecs = [[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]]
|
|
for v in vecs:
|
|
db.execute("insert into v(emb) values (?)", [_f32(v)])
|
|
|
|
rows = db.execute("select rowid from v order by rowid").fetchall()
|
|
rowids = [r[0] for r in rows]
|
|
assert rowids == [1, 2, 3]
|
|
|
|
for i, v in enumerate(vecs, start=1):
|
|
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
|
assert row[0] == _f32(v)
|
|
|
|
|
|
def test_insert_text_primary_key(db, snapshot):
|
|
db.execute(
|
|
"create virtual table v using vec0(id text primary key, emb float[4], chunk_size=8)"
|
|
)
|
|
|
|
db.execute(
|
|
"insert into v(id, emb) values ('doc_a', ?)", [_f32([1.0, 2.0, 3.0, 4.0])]
|
|
)
|
|
db.execute(
|
|
"insert into v(id, emb) values ('doc_b', ?)", [_f32([5.0, 6.0, 7.0, 8.0])]
|
|
)
|
|
|
|
assert exec(db, "select rowid, id, chunk_id, chunk_offset from v_rowids order by rowid") == snapshot(
|
|
name="rowids"
|
|
)
|
|
|
|
row = db.execute("select emb from v where id = 'doc_a'").fetchone()
|
|
assert row[0] == _f32([1.0, 2.0, 3.0, 4.0])
|
|
|
|
row = db.execute("select emb from v where id = 'doc_b'").fetchone()
|
|
assert row[0] == _f32([5.0, 6.0, 7.0, 8.0])
|
|
|
|
|
|
def test_delete_clears_validity(db):
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
|
|
for i, v in enumerate(
|
|
[[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]],
|
|
start=1,
|
|
):
|
|
db.execute("insert into v(rowid, emb) values (?, ?)", [i, _f32(v)])
|
|
|
|
db.execute("delete from v where rowid = 2")
|
|
|
|
rows = db.execute("select rowid from v order by rowid").fetchall()
|
|
assert [r[0] for r in rows] == [1, 3]
|
|
|
|
rowid_rows = db.execute("select rowid from v_rowids order by rowid").fetchall()
|
|
assert 2 not in [r[0] for r in rowid_rows]
|
|
|
|
# Inserting a new vector after deletion still works
|
|
db.execute("insert into v(rowid, emb) values (4, ?)", [_f32([0.0, 0.0, 0.0, 1.0])])
|
|
row = db.execute("select emb from v where rowid = 4").fetchone()
|
|
assert row[0] == _f32([0.0, 0.0, 0.0, 1.0])
|
|
|
|
|
|
def test_insert_delete_reinsert(db):
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
|
|
db.execute("insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 1.0, 1.0, 1.0])])
|
|
db.execute("delete from v where rowid = 1")
|
|
db.execute("insert into v(rowid, emb) values (2, ?)", [_f32([2.0, 2.0, 2.0, 2.0])])
|
|
|
|
rows = db.execute("select rowid from v order by rowid").fetchall()
|
|
assert [r[0] for r in rows] == [2]
|
|
|
|
# KNN query works and returns rowid 2
|
|
knn = db.execute(
|
|
"select rowid, distance from v where emb match ? and k = 1",
|
|
[_f32([2.0, 2.0, 2.0, 2.0])],
|
|
).fetchall()
|
|
assert len(knn) == 1
|
|
assert knn[0][0] == 2
|
|
|
|
|
|
def test_insert_validates_dimensions(db):
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
|
|
result = exec(db, "insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 2.0, 3.0])])
|
|
assert result["error"] == "OperationalError"
|
|
assert "Dimension mismatch" in result["message"]
|
|
assert "Expected 4" in result["message"]
|
|
assert "3" in result["message"]
|
|
|
|
result = exec(
|
|
db, "insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 2.0, 3.0, 4.0, 5.0])]
|
|
)
|
|
assert result["error"] == "OperationalError"
|
|
assert "Dimension mismatch" in result["message"]
|
|
assert "Expected 4" in result["message"]
|
|
assert "5" in result["message"]
|
|
|
|
|
|
def test_insert_validates_type(db):
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
|
|
int8_vec = struct.pack("4b", 1, 2, 3, 4)
|
|
result = exec(
|
|
db,
|
|
"insert into v(rowid, emb) values (1, vec_int8(?))",
|
|
[int8_vec],
|
|
)
|
|
assert "error" in result
|
|
assert "float32" in result["message"]
|
|
assert "int8" in result["message"]
|
|
|
|
|
|
def test_info_table_contents(db, snapshot):
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
assert exec(db, "select key, value from v_info order by key") == snapshot()
|