mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 00:36:56 +02:00
Add Python regression tests for existing insert/delete paths
Baseline tests protecting non-DiskANN chunk-based insert and delete behavior: vector round-trips, auto rowids, text primary keys, delete validity, reinsert after delete, dimension/type validation, and v_info snapshot. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
0bca960e9d
commit
206fbc2bdd
2 changed files with 236 additions and 0 deletions
63
tests/__snapshots__/test-insert-delete.ambr
Normal file
63
tests/__snapshots__/test-insert-delete.ambr
Normal file
|
|
@ -0,0 +1,63 @@
|
||||||
|
# serializer version: 1
|
||||||
|
# name: test_info_table_contents
|
||||||
|
OrderedDict({
|
||||||
|
'sql': 'select key, value from v_info order by key',
|
||||||
|
'rows': list([
|
||||||
|
OrderedDict({
|
||||||
|
'key': 'CREATE_VERSION',
|
||||||
|
'value': 'v0.1.7-alpha.10',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'key': 'CREATE_VERSION_MAJOR',
|
||||||
|
'value': 0,
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'key': 'CREATE_VERSION_MINOR',
|
||||||
|
'value': 1,
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'key': 'CREATE_VERSION_PATCH',
|
||||||
|
'value': 7,
|
||||||
|
}),
|
||||||
|
]),
|
||||||
|
})
|
||||||
|
# ---
|
||||||
|
# name: test_insert_creates_chunks_and_vectors[rowids_count]
|
||||||
|
OrderedDict({
|
||||||
|
'sql': 'select count(*) as cnt from v_rowids',
|
||||||
|
'rows': list([
|
||||||
|
OrderedDict({
|
||||||
|
'cnt': 5,
|
||||||
|
}),
|
||||||
|
]),
|
||||||
|
})
|
||||||
|
# ---
|
||||||
|
# name: test_insert_creates_chunks_and_vectors[vector_chunks_count]
|
||||||
|
OrderedDict({
|
||||||
|
'sql': 'select count(*) as cnt from v_vector_chunks00',
|
||||||
|
'rows': list([
|
||||||
|
OrderedDict({
|
||||||
|
'cnt': 1,
|
||||||
|
}),
|
||||||
|
]),
|
||||||
|
})
|
||||||
|
# ---
|
||||||
|
# name: test_insert_text_primary_key[rowids]
|
||||||
|
OrderedDict({
|
||||||
|
'sql': 'select rowid, id, chunk_id, chunk_offset from v_rowids order by rowid',
|
||||||
|
'rows': list([
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 1,
|
||||||
|
'id': 'doc_a',
|
||||||
|
'chunk_id': 1,
|
||||||
|
'chunk_offset': 0,
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 2,
|
||||||
|
'id': 'doc_b',
|
||||||
|
'chunk_id': 1,
|
||||||
|
'chunk_offset': 1,
|
||||||
|
}),
|
||||||
|
]),
|
||||||
|
})
|
||||||
|
# ---
|
||||||
173
tests/test-insert-delete.py
Normal file
173
tests/test-insert-delete.py
Normal file
|
|
@ -0,0 +1,173 @@
|
||||||
|
import sqlite3
|
||||||
|
import struct
|
||||||
|
from collections import OrderedDict
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _f32(list):
|
||||||
|
return struct.pack("%sf" % len(list), *list)
|
||||||
|
|
||||||
|
|
||||||
|
def exec(db, sql, parameters=[]):
|
||||||
|
try:
|
||||||
|
rows = db.execute(sql, parameters).fetchall()
|
||||||
|
except (sqlite3.OperationalError, sqlite3.DatabaseError) as e:
|
||||||
|
return {
|
||||||
|
"error": e.__class__.__name__,
|
||||||
|
"message": str(e),
|
||||||
|
}
|
||||||
|
a = []
|
||||||
|
for row in rows:
|
||||||
|
o = OrderedDict()
|
||||||
|
for k in row.keys():
|
||||||
|
o[k] = row[k]
|
||||||
|
a.append(o)
|
||||||
|
result = OrderedDict()
|
||||||
|
result["sql"] = sql
|
||||||
|
result["rows"] = a
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def test_insert_creates_chunks_and_vectors(db, snapshot):
|
||||||
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||||
|
|
||||||
|
vecs = [
|
||||||
|
[1.0, 2.0, 3.0, 4.0],
|
||||||
|
[5.0, 6.0, 7.0, 8.0],
|
||||||
|
[0.1, 0.2, 0.3, 0.4],
|
||||||
|
[10.0, 20.0, 30.0, 40.0],
|
||||||
|
[0.5, 0.5, 0.5, 0.5],
|
||||||
|
]
|
||||||
|
for i, v in enumerate(vecs, start=1):
|
||||||
|
db.execute("insert into v(rowid, emb) values (?, ?)", [i, _f32(v)])
|
||||||
|
|
||||||
|
assert exec(db, "select count(*) as cnt from v_rowids") == snapshot(
|
||||||
|
name="rowids_count"
|
||||||
|
)
|
||||||
|
assert exec(db, "select count(*) as cnt from v_vector_chunks00") == snapshot(
|
||||||
|
name="vector_chunks_count"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify round-trip: each inserted vector comes back identical
|
||||||
|
for i, v in enumerate(vecs, start=1):
|
||||||
|
rows = db.execute("select emb from v where rowid = ?", [i]).fetchall()
|
||||||
|
assert len(rows) == 1
|
||||||
|
assert rows[0][0] == _f32(v)
|
||||||
|
|
||||||
|
|
||||||
|
def test_insert_auto_rowid(db):
|
||||||
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||||
|
|
||||||
|
vecs = [[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]]
|
||||||
|
for v in vecs:
|
||||||
|
db.execute("insert into v(emb) values (?)", [_f32(v)])
|
||||||
|
|
||||||
|
rows = db.execute("select rowid from v order by rowid").fetchall()
|
||||||
|
rowids = [r[0] for r in rows]
|
||||||
|
assert rowids == [1, 2, 3]
|
||||||
|
|
||||||
|
for i, v in enumerate(vecs, start=1):
|
||||||
|
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
||||||
|
assert row[0] == _f32(v)
|
||||||
|
|
||||||
|
|
||||||
|
def test_insert_text_primary_key(db, snapshot):
|
||||||
|
db.execute(
|
||||||
|
"create virtual table v using vec0(id text primary key, emb float[4], chunk_size=8)"
|
||||||
|
)
|
||||||
|
|
||||||
|
db.execute(
|
||||||
|
"insert into v(id, emb) values ('doc_a', ?)", [_f32([1.0, 2.0, 3.0, 4.0])]
|
||||||
|
)
|
||||||
|
db.execute(
|
||||||
|
"insert into v(id, emb) values ('doc_b', ?)", [_f32([5.0, 6.0, 7.0, 8.0])]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert exec(db, "select rowid, id, chunk_id, chunk_offset from v_rowids order by rowid") == snapshot(
|
||||||
|
name="rowids"
|
||||||
|
)
|
||||||
|
|
||||||
|
row = db.execute("select emb from v where id = 'doc_a'").fetchone()
|
||||||
|
assert row[0] == _f32([1.0, 2.0, 3.0, 4.0])
|
||||||
|
|
||||||
|
row = db.execute("select emb from v where id = 'doc_b'").fetchone()
|
||||||
|
assert row[0] == _f32([5.0, 6.0, 7.0, 8.0])
|
||||||
|
|
||||||
|
|
||||||
|
def test_delete_clears_validity(db):
|
||||||
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||||
|
|
||||||
|
for i, v in enumerate(
|
||||||
|
[[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]],
|
||||||
|
start=1,
|
||||||
|
):
|
||||||
|
db.execute("insert into v(rowid, emb) values (?, ?)", [i, _f32(v)])
|
||||||
|
|
||||||
|
db.execute("delete from v where rowid = 2")
|
||||||
|
|
||||||
|
rows = db.execute("select rowid from v order by rowid").fetchall()
|
||||||
|
assert [r[0] for r in rows] == [1, 3]
|
||||||
|
|
||||||
|
rowid_rows = db.execute("select rowid from v_rowids order by rowid").fetchall()
|
||||||
|
assert 2 not in [r[0] for r in rowid_rows]
|
||||||
|
|
||||||
|
# Inserting a new vector after deletion still works
|
||||||
|
db.execute("insert into v(rowid, emb) values (4, ?)", [_f32([0.0, 0.0, 0.0, 1.0])])
|
||||||
|
row = db.execute("select emb from v where rowid = 4").fetchone()
|
||||||
|
assert row[0] == _f32([0.0, 0.0, 0.0, 1.0])
|
||||||
|
|
||||||
|
|
||||||
|
def test_insert_delete_reinsert(db):
|
||||||
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||||
|
|
||||||
|
db.execute("insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 1.0, 1.0, 1.0])])
|
||||||
|
db.execute("delete from v where rowid = 1")
|
||||||
|
db.execute("insert into v(rowid, emb) values (2, ?)", [_f32([2.0, 2.0, 2.0, 2.0])])
|
||||||
|
|
||||||
|
rows = db.execute("select rowid from v order by rowid").fetchall()
|
||||||
|
assert [r[0] for r in rows] == [2]
|
||||||
|
|
||||||
|
# KNN query works and returns rowid 2
|
||||||
|
knn = db.execute(
|
||||||
|
"select rowid, distance from v where emb match ? and k = 1",
|
||||||
|
[_f32([2.0, 2.0, 2.0, 2.0])],
|
||||||
|
).fetchall()
|
||||||
|
assert len(knn) == 1
|
||||||
|
assert knn[0][0] == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_insert_validates_dimensions(db):
|
||||||
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||||
|
|
||||||
|
result = exec(db, "insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 2.0, 3.0])])
|
||||||
|
assert result["error"] == "OperationalError"
|
||||||
|
assert "Dimension mismatch" in result["message"]
|
||||||
|
assert "Expected 4" in result["message"]
|
||||||
|
assert "3" in result["message"]
|
||||||
|
|
||||||
|
result = exec(
|
||||||
|
db, "insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 2.0, 3.0, 4.0, 5.0])]
|
||||||
|
)
|
||||||
|
assert result["error"] == "OperationalError"
|
||||||
|
assert "Dimension mismatch" in result["message"]
|
||||||
|
assert "Expected 4" in result["message"]
|
||||||
|
assert "5" in result["message"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_insert_validates_type(db):
|
||||||
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||||
|
|
||||||
|
int8_vec = struct.pack("4b", 1, 2, 3, 4)
|
||||||
|
result = exec(
|
||||||
|
db,
|
||||||
|
"insert into v(rowid, emb) values (1, vec_int8(?))",
|
||||||
|
[int8_vec],
|
||||||
|
)
|
||||||
|
assert "error" in result
|
||||||
|
assert "float32" in result["message"]
|
||||||
|
assert "int8" in result["message"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_info_table_contents(db, snapshot):
|
||||||
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||||
|
assert exec(db, "select key, value from v_info order by key") == snapshot()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue