diff --git a/tests/__snapshots__/test-insert-delete.ambr b/tests/__snapshots__/test-insert-delete.ambr new file mode 100644 index 0000000..8e2e848 --- /dev/null +++ b/tests/__snapshots__/test-insert-delete.ambr @@ -0,0 +1,63 @@ +# serializer version: 1 +# name: test_info_table_contents + OrderedDict({ + 'sql': 'select key, value from v_info order by key', + 'rows': list([ + OrderedDict({ + 'key': 'CREATE_VERSION', + 'value': 'v0.1.7-alpha.10', + }), + OrderedDict({ + 'key': 'CREATE_VERSION_MAJOR', + 'value': 0, + }), + OrderedDict({ + 'key': 'CREATE_VERSION_MINOR', + 'value': 1, + }), + OrderedDict({ + 'key': 'CREATE_VERSION_PATCH', + 'value': 7, + }), + ]), + }) +# --- +# name: test_insert_creates_chunks_and_vectors[rowids_count] + OrderedDict({ + 'sql': 'select count(*) as cnt from v_rowids', + 'rows': list([ + OrderedDict({ + 'cnt': 5, + }), + ]), + }) +# --- +# name: test_insert_creates_chunks_and_vectors[vector_chunks_count] + OrderedDict({ + 'sql': 'select count(*) as cnt from v_vector_chunks00', + 'rows': list([ + OrderedDict({ + 'cnt': 1, + }), + ]), + }) +# --- +# name: test_insert_text_primary_key[rowids] + OrderedDict({ + 'sql': 'select rowid, id, chunk_id, chunk_offset from v_rowids order by rowid', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'id': 'doc_a', + 'chunk_id': 1, + 'chunk_offset': 0, + }), + OrderedDict({ + 'rowid': 2, + 'id': 'doc_b', + 'chunk_id': 1, + 'chunk_offset': 1, + }), + ]), + }) +# --- diff --git a/tests/test-insert-delete.py b/tests/test-insert-delete.py new file mode 100644 index 0000000..c459954 --- /dev/null +++ b/tests/test-insert-delete.py @@ -0,0 +1,173 @@ +import sqlite3 +import struct +from collections import OrderedDict +import pytest + + +def _f32(list): + return struct.pack("%sf" % len(list), *list) + + +def exec(db, sql, parameters=[]): + try: + rows = db.execute(sql, parameters).fetchall() + except (sqlite3.OperationalError, sqlite3.DatabaseError) as e: + return { + "error": e.__class__.__name__, + "message": str(e), + } + a = [] + for row in rows: + o = OrderedDict() + for k in row.keys(): + o[k] = row[k] + a.append(o) + result = OrderedDict() + result["sql"] = sql + result["rows"] = a + return result + + +def test_insert_creates_chunks_and_vectors(db, snapshot): + db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)") + + vecs = [ + [1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [0.1, 0.2, 0.3, 0.4], + [10.0, 20.0, 30.0, 40.0], + [0.5, 0.5, 0.5, 0.5], + ] + for i, v in enumerate(vecs, start=1): + db.execute("insert into v(rowid, emb) values (?, ?)", [i, _f32(v)]) + + assert exec(db, "select count(*) as cnt from v_rowids") == snapshot( + name="rowids_count" + ) + assert exec(db, "select count(*) as cnt from v_vector_chunks00") == snapshot( + name="vector_chunks_count" + ) + + # Verify round-trip: each inserted vector comes back identical + for i, v in enumerate(vecs, start=1): + rows = db.execute("select emb from v where rowid = ?", [i]).fetchall() + assert len(rows) == 1 + assert rows[0][0] == _f32(v) + + +def test_insert_auto_rowid(db): + db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)") + + vecs = [[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]] + for v in vecs: + db.execute("insert into v(emb) values (?)", [_f32(v)]) + + rows = db.execute("select rowid from v order by rowid").fetchall() + rowids = [r[0] for r in rows] + assert rowids == [1, 2, 3] + + for i, v in enumerate(vecs, start=1): + row = db.execute("select emb from v where rowid = ?", [i]).fetchone() + assert row[0] == _f32(v) + + +def test_insert_text_primary_key(db, snapshot): + db.execute( + "create virtual table v using vec0(id text primary key, emb float[4], chunk_size=8)" + ) + + db.execute( + "insert into v(id, emb) values ('doc_a', ?)", [_f32([1.0, 2.0, 3.0, 4.0])] + ) + db.execute( + "insert into v(id, emb) values ('doc_b', ?)", [_f32([5.0, 6.0, 7.0, 8.0])] + ) + + assert exec(db, "select rowid, id, chunk_id, chunk_offset from v_rowids order by rowid") == snapshot( + name="rowids" + ) + + row = db.execute("select emb from v where id = 'doc_a'").fetchone() + assert row[0] == _f32([1.0, 2.0, 3.0, 4.0]) + + row = db.execute("select emb from v where id = 'doc_b'").fetchone() + assert row[0] == _f32([5.0, 6.0, 7.0, 8.0]) + + +def test_delete_clears_validity(db): + db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)") + + for i, v in enumerate( + [[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]], + start=1, + ): + db.execute("insert into v(rowid, emb) values (?, ?)", [i, _f32(v)]) + + db.execute("delete from v where rowid = 2") + + rows = db.execute("select rowid from v order by rowid").fetchall() + assert [r[0] for r in rows] == [1, 3] + + rowid_rows = db.execute("select rowid from v_rowids order by rowid").fetchall() + assert 2 not in [r[0] for r in rowid_rows] + + # Inserting a new vector after deletion still works + db.execute("insert into v(rowid, emb) values (4, ?)", [_f32([0.0, 0.0, 0.0, 1.0])]) + row = db.execute("select emb from v where rowid = 4").fetchone() + assert row[0] == _f32([0.0, 0.0, 0.0, 1.0]) + + +def test_insert_delete_reinsert(db): + db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)") + + db.execute("insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 1.0, 1.0, 1.0])]) + db.execute("delete from v where rowid = 1") + db.execute("insert into v(rowid, emb) values (2, ?)", [_f32([2.0, 2.0, 2.0, 2.0])]) + + rows = db.execute("select rowid from v order by rowid").fetchall() + assert [r[0] for r in rows] == [2] + + # KNN query works and returns rowid 2 + knn = db.execute( + "select rowid, distance from v where emb match ? and k = 1", + [_f32([2.0, 2.0, 2.0, 2.0])], + ).fetchall() + assert len(knn) == 1 + assert knn[0][0] == 2 + + +def test_insert_validates_dimensions(db): + db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)") + + result = exec(db, "insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 2.0, 3.0])]) + assert result["error"] == "OperationalError" + assert "Dimension mismatch" in result["message"] + assert "Expected 4" in result["message"] + assert "3" in result["message"] + + result = exec( + db, "insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 2.0, 3.0, 4.0, 5.0])] + ) + assert result["error"] == "OperationalError" + assert "Dimension mismatch" in result["message"] + assert "Expected 4" in result["message"] + assert "5" in result["message"] + + +def test_insert_validates_type(db): + db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)") + + int8_vec = struct.pack("4b", 1, 2, 3, 4) + result = exec( + db, + "insert into v(rowid, emb) values (1, vec_int8(?))", + [int8_vec], + ) + assert "error" in result + assert "float32" in result["message"] + assert "int8" in result["message"] + + +def test_info_table_contents(db, snapshot): + db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)") + assert exec(db, "select key, value from v_info order by key") == snapshot()