sqlite-vec/tests/test-insert-delete.py

150 lines
5.1 KiB
Python
Raw Normal View History

import sqlite3
import struct
import pytest
from helpers import _f32, exec
def test_insert_creates_chunks_and_vectors(db, snapshot):
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
vecs = [
[1.0, 2.0, 3.0, 4.0],
[5.0, 6.0, 7.0, 8.0],
[0.1, 0.2, 0.3, 0.4],
[10.0, 20.0, 30.0, 40.0],
[0.5, 0.5, 0.5, 0.5],
]
for i, v in enumerate(vecs, start=1):
db.execute("insert into v(rowid, emb) values (?, ?)", [i, _f32(v)])
assert exec(db, "select count(*) as cnt from v_rowids") == snapshot(
name="rowids_count"
)
assert exec(db, "select count(*) as cnt from v_vector_chunks00") == snapshot(
name="vector_chunks_count"
)
# Verify round-trip: each inserted vector comes back identical
for i, v in enumerate(vecs, start=1):
rows = db.execute("select emb from v where rowid = ?", [i]).fetchall()
assert len(rows) == 1
assert rows[0][0] == _f32(v)
def test_insert_auto_rowid(db):
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
vecs = [[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]]
for v in vecs:
db.execute("insert into v(emb) values (?)", [_f32(v)])
rows = db.execute("select rowid from v order by rowid").fetchall()
rowids = [r[0] for r in rows]
assert rowids == [1, 2, 3]
for i, v in enumerate(vecs, start=1):
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
assert row[0] == _f32(v)
def test_insert_text_primary_key(db, snapshot):
db.execute(
"create virtual table v using vec0(id text primary key, emb float[4], chunk_size=8)"
)
db.execute(
"insert into v(id, emb) values ('doc_a', ?)", [_f32([1.0, 2.0, 3.0, 4.0])]
)
db.execute(
"insert into v(id, emb) values ('doc_b', ?)", [_f32([5.0, 6.0, 7.0, 8.0])]
)
assert exec(db, "select rowid, id, chunk_id, chunk_offset from v_rowids order by rowid") == snapshot(
name="rowids"
)
row = db.execute("select emb from v where id = 'doc_a'").fetchone()
assert row[0] == _f32([1.0, 2.0, 3.0, 4.0])
row = db.execute("select emb from v where id = 'doc_b'").fetchone()
assert row[0] == _f32([5.0, 6.0, 7.0, 8.0])
def test_delete_clears_validity(db):
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
for i, v in enumerate(
[[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]],
start=1,
):
db.execute("insert into v(rowid, emb) values (?, ?)", [i, _f32(v)])
db.execute("delete from v where rowid = 2")
rows = db.execute("select rowid from v order by rowid").fetchall()
assert [r[0] for r in rows] == [1, 3]
rowid_rows = db.execute("select rowid from v_rowids order by rowid").fetchall()
assert 2 not in [r[0] for r in rowid_rows]
# Inserting a new vector after deletion still works
db.execute("insert into v(rowid, emb) values (4, ?)", [_f32([0.0, 0.0, 0.0, 1.0])])
row = db.execute("select emb from v where rowid = 4").fetchone()
assert row[0] == _f32([0.0, 0.0, 0.0, 1.0])
def test_insert_delete_reinsert(db):
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
db.execute("insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 1.0, 1.0, 1.0])])
db.execute("delete from v where rowid = 1")
db.execute("insert into v(rowid, emb) values (2, ?)", [_f32([2.0, 2.0, 2.0, 2.0])])
rows = db.execute("select rowid from v order by rowid").fetchall()
assert [r[0] for r in rows] == [2]
# KNN query works and returns rowid 2
knn = db.execute(
"select rowid, distance from v where emb match ? and k = 1",
[_f32([2.0, 2.0, 2.0, 2.0])],
).fetchall()
assert len(knn) == 1
assert knn[0][0] == 2
def test_insert_validates_dimensions(db):
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
result = exec(db, "insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 2.0, 3.0])])
assert result["error"] == "OperationalError"
assert "Dimension mismatch" in result["message"]
assert "Expected 4" in result["message"]
assert "3" in result["message"]
result = exec(
db, "insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 2.0, 3.0, 4.0, 5.0])]
)
assert result["error"] == "OperationalError"
assert "Dimension mismatch" in result["message"]
assert "Expected 4" in result["message"]
assert "5" in result["message"]
def test_insert_validates_type(db):
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
int8_vec = struct.pack("4b", 1, 2, 3, 4)
result = exec(
db,
"insert into v(rowid, emb) values (1, vec_int8(?))",
[int8_vec],
)
assert "error" in result
assert "float32" in result["message"]
assert "int8" in result["message"]
def test_info_table_contents(db, snapshot):
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
assert exec(db, "select key, value from v_info order by key") == snapshot()