2026-03-02 18:12:01 -08:00
|
|
|
import sqlite3
|
|
|
|
|
import struct
|
|
|
|
|
import pytest
|
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
|
|
|
from helpers import _f32, _i64, _int8, exec
|
2026-03-02 18:12:01 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_insert_creates_chunks_and_vectors(db, snapshot):
|
|
|
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
|
|
|
|
|
|
|
|
vecs = [
|
|
|
|
|
[1.0, 2.0, 3.0, 4.0],
|
|
|
|
|
[5.0, 6.0, 7.0, 8.0],
|
|
|
|
|
[0.1, 0.2, 0.3, 0.4],
|
|
|
|
|
[10.0, 20.0, 30.0, 40.0],
|
|
|
|
|
[0.5, 0.5, 0.5, 0.5],
|
|
|
|
|
]
|
|
|
|
|
for i, v in enumerate(vecs, start=1):
|
|
|
|
|
db.execute("insert into v(rowid, emb) values (?, ?)", [i, _f32(v)])
|
|
|
|
|
|
|
|
|
|
assert exec(db, "select count(*) as cnt from v_rowids") == snapshot(
|
|
|
|
|
name="rowids_count"
|
|
|
|
|
)
|
|
|
|
|
assert exec(db, "select count(*) as cnt from v_vector_chunks00") == snapshot(
|
|
|
|
|
name="vector_chunks_count"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Verify round-trip: each inserted vector comes back identical
|
|
|
|
|
for i, v in enumerate(vecs, start=1):
|
|
|
|
|
rows = db.execute("select emb from v where rowid = ?", [i]).fetchall()
|
|
|
|
|
assert len(rows) == 1
|
|
|
|
|
assert rows[0][0] == _f32(v)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_insert_auto_rowid(db):
|
|
|
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
|
|
|
|
|
|
|
|
vecs = [[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]]
|
|
|
|
|
for v in vecs:
|
|
|
|
|
db.execute("insert into v(emb) values (?)", [_f32(v)])
|
|
|
|
|
|
|
|
|
|
rows = db.execute("select rowid from v order by rowid").fetchall()
|
|
|
|
|
rowids = [r[0] for r in rows]
|
|
|
|
|
assert rowids == [1, 2, 3]
|
|
|
|
|
|
|
|
|
|
for i, v in enumerate(vecs, start=1):
|
|
|
|
|
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
|
|
|
|
assert row[0] == _f32(v)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_insert_text_primary_key(db, snapshot):
|
|
|
|
|
db.execute(
|
|
|
|
|
"create virtual table v using vec0(id text primary key, emb float[4], chunk_size=8)"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(id, emb) values ('doc_a', ?)", [_f32([1.0, 2.0, 3.0, 4.0])]
|
|
|
|
|
)
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(id, emb) values ('doc_b', ?)", [_f32([5.0, 6.0, 7.0, 8.0])]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert exec(db, "select rowid, id, chunk_id, chunk_offset from v_rowids order by rowid") == snapshot(
|
|
|
|
|
name="rowids"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
row = db.execute("select emb from v where id = 'doc_a'").fetchone()
|
|
|
|
|
assert row[0] == _f32([1.0, 2.0, 3.0, 4.0])
|
|
|
|
|
|
|
|
|
|
row = db.execute("select emb from v where id = 'doc_b'").fetchone()
|
|
|
|
|
assert row[0] == _f32([5.0, 6.0, 7.0, 8.0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_delete_clears_validity(db):
|
|
|
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
|
|
|
|
|
|
|
|
for i, v in enumerate(
|
|
|
|
|
[[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]],
|
|
|
|
|
start=1,
|
|
|
|
|
):
|
|
|
|
|
db.execute("insert into v(rowid, emb) values (?, ?)", [i, _f32(v)])
|
|
|
|
|
|
|
|
|
|
db.execute("delete from v where rowid = 2")
|
|
|
|
|
|
|
|
|
|
rows = db.execute("select rowid from v order by rowid").fetchall()
|
|
|
|
|
assert [r[0] for r in rows] == [1, 3]
|
|
|
|
|
|
|
|
|
|
rowid_rows = db.execute("select rowid from v_rowids order by rowid").fetchall()
|
|
|
|
|
assert 2 not in [r[0] for r in rowid_rows]
|
|
|
|
|
|
|
|
|
|
# Inserting a new vector after deletion still works
|
|
|
|
|
db.execute("insert into v(rowid, emb) values (4, ?)", [_f32([0.0, 0.0, 0.0, 1.0])])
|
|
|
|
|
row = db.execute("select emb from v where rowid = 4").fetchone()
|
|
|
|
|
assert row[0] == _f32([0.0, 0.0, 0.0, 1.0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_insert_delete_reinsert(db):
|
|
|
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
|
|
|
|
|
|
|
|
db.execute("insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 1.0, 1.0, 1.0])])
|
|
|
|
|
db.execute("delete from v where rowid = 1")
|
|
|
|
|
db.execute("insert into v(rowid, emb) values (2, ?)", [_f32([2.0, 2.0, 2.0, 2.0])])
|
|
|
|
|
|
|
|
|
|
rows = db.execute("select rowid from v order by rowid").fetchall()
|
|
|
|
|
assert [r[0] for r in rows] == [2]
|
|
|
|
|
|
|
|
|
|
# KNN query works and returns rowid 2
|
|
|
|
|
knn = db.execute(
|
|
|
|
|
"select rowid, distance from v where emb match ? and k = 1",
|
|
|
|
|
[_f32([2.0, 2.0, 2.0, 2.0])],
|
|
|
|
|
).fetchall()
|
|
|
|
|
assert len(knn) == 1
|
|
|
|
|
assert knn[0][0] == 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_insert_validates_dimensions(db):
|
|
|
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
|
|
|
|
|
|
|
|
result = exec(db, "insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 2.0, 3.0])])
|
|
|
|
|
assert result["error"] == "OperationalError"
|
|
|
|
|
assert "Dimension mismatch" in result["message"]
|
|
|
|
|
assert "Expected 4" in result["message"]
|
|
|
|
|
assert "3" in result["message"]
|
|
|
|
|
|
|
|
|
|
result = exec(
|
|
|
|
|
db, "insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 2.0, 3.0, 4.0, 5.0])]
|
|
|
|
|
)
|
|
|
|
|
assert result["error"] == "OperationalError"
|
|
|
|
|
assert "Dimension mismatch" in result["message"]
|
|
|
|
|
assert "Expected 4" in result["message"]
|
|
|
|
|
assert "5" in result["message"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_insert_validates_type(db):
|
|
|
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
|
|
|
|
|
|
|
|
int8_vec = struct.pack("4b", 1, 2, 3, 4)
|
|
|
|
|
result = exec(
|
|
|
|
|
db,
|
|
|
|
|
"insert into v(rowid, emb) values (1, vec_int8(?))",
|
|
|
|
|
[int8_vec],
|
|
|
|
|
)
|
|
|
|
|
assert "error" in result
|
|
|
|
|
assert "float32" in result["message"]
|
|
|
|
|
assert "int8" in result["message"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_info_table_contents(db, snapshot):
|
|
|
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
2026-03-17 00:09:28 -07:00
|
|
|
assert exec(db, "select key, value from v_info where key not like 'CREATE_VERSION%' order by key") == snapshot()
|
|
|
|
|
# Smoke-check that version keys exist without pinning exact values
|
|
|
|
|
version_rows = exec(db, "select key from v_info where key like 'CREATE_VERSION%' order by key")
|
|
|
|
|
keys = [r["key"] for r in version_rows["rows"]]
|
|
|
|
|
assert keys == ["CREATE_VERSION", "CREATE_VERSION_MAJOR", "CREATE_VERSION_MINOR", "CREATE_VERSION_PATCH"]
|
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_delete_zeroes_rowid_blob(db):
|
|
|
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
|
|
|
|
|
|
|
|
for i in range(1, 4):
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, emb) values (?, ?)",
|
|
|
|
|
[i, _f32([float(i)] * 4)],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
db.execute("delete from v where rowid = 2")
|
|
|
|
|
|
|
|
|
|
blob = db.execute("select rowids from v_chunks where rowid = 1").fetchone()[0]
|
|
|
|
|
rowids = struct.unpack("<8q", blob)
|
|
|
|
|
assert rowids[0] == 1 # slot 0 intact
|
|
|
|
|
assert rowids[1] == 0 # slot 1 zeroed (was rowid 2)
|
|
|
|
|
assert rowids[2] == 3 # slot 2 intact
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_delete_zeroes_vector_blob(db):
|
|
|
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
|
|
|
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 2.0, 3.0, 4.0])]
|
|
|
|
|
)
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, emb) values (2, ?)", [_f32([5.0, 6.0, 7.0, 8.0])]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
db.execute("delete from v where rowid = 1")
|
|
|
|
|
|
|
|
|
|
blob = db.execute(
|
|
|
|
|
"select vectors from v_vector_chunks00 where rowid = 1"
|
|
|
|
|
).fetchone()[0]
|
|
|
|
|
# First slot (4 floats = 16 bytes) should be zeroed
|
|
|
|
|
first_slot = struct.unpack("<4f", blob[:16])
|
|
|
|
|
assert first_slot == (0.0, 0.0, 0.0, 0.0)
|
|
|
|
|
# Second slot should be unchanged
|
|
|
|
|
second_slot = struct.unpack("<4f", blob[16:32])
|
|
|
|
|
assert second_slot == (5.0, 6.0, 7.0, 8.0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_delete_all_rows_deletes_chunk(db):
|
|
|
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
|
|
|
|
|
|
|
|
for i in range(1, 9):
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, emb) values (?, ?)",
|
|
|
|
|
[i, _f32([float(i)] * 4)],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
for i in range(1, 9):
|
|
|
|
|
db.execute("delete from v where rowid = ?", [i])
|
|
|
|
|
|
|
|
|
|
assert (
|
|
|
|
|
db.execute("select count(*) from v_chunks").fetchone()[0] == 0
|
|
|
|
|
)
|
|
|
|
|
assert (
|
|
|
|
|
db.execute("select count(*) from v_vector_chunks00").fetchone()[0] == 0
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Inserting after full deletion still works
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, emb) values (100, ?)", [_f32([9.0, 9.0, 9.0, 9.0])]
|
|
|
|
|
)
|
|
|
|
|
row = db.execute("select emb from v where rowid = 100").fetchone()
|
|
|
|
|
assert row[0] == _f32([9.0, 9.0, 9.0, 9.0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_delete_chunk_multiple_chunks(db):
|
|
|
|
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
|
|
|
|
|
|
|
|
|
for i in range(1, 17):
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, emb) values (?, ?)",
|
|
|
|
|
[i, _f32([float(i)] * 4)],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Delete all rows from the first chunk (rows 1-8)
|
|
|
|
|
for i in range(1, 9):
|
|
|
|
|
db.execute("delete from v where rowid = ?", [i])
|
|
|
|
|
|
|
|
|
|
# Only 1 chunk should remain
|
|
|
|
|
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 1
|
|
|
|
|
|
|
|
|
|
# Rows 9-16 still queryable
|
|
|
|
|
for i in range(9, 17):
|
|
|
|
|
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
|
|
|
|
assert row[0] == _f32([float(i)] * 4)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_delete_with_metadata_columns(db):
|
|
|
|
|
db.execute(
|
|
|
|
|
"create virtual table v using vec0("
|
|
|
|
|
"emb float[4], "
|
|
|
|
|
"m_bool boolean, "
|
|
|
|
|
"m_int integer, "
|
|
|
|
|
"m_float float, "
|
|
|
|
|
"m_text text, "
|
|
|
|
|
"chunk_size=8"
|
|
|
|
|
")"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
for i in range(1, 9):
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, emb, m_bool, m_int, m_float, m_text) "
|
|
|
|
|
"values (?, ?, ?, ?, ?, ?)",
|
|
|
|
|
[i, _f32([float(i)] * 4), i % 2 == 0, i * 10, float(i) / 2.0, f"text_{i}"],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
for i in range(1, 9):
|
|
|
|
|
db.execute("delete from v where rowid = ?", [i])
|
|
|
|
|
|
|
|
|
|
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 0
|
|
|
|
|
assert db.execute("select count(*) from v_vector_chunks00").fetchone()[0] == 0
|
|
|
|
|
assert db.execute("select count(*) from v_metadatachunks00").fetchone()[0] == 0
|
|
|
|
|
assert db.execute("select count(*) from v_metadatachunks01").fetchone()[0] == 0
|
|
|
|
|
assert db.execute("select count(*) from v_metadatachunks02").fetchone()[0] == 0
|
|
|
|
|
assert db.execute("select count(*) from v_metadatachunks03").fetchone()[0] == 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_delete_with_auxiliary_columns(db):
|
|
|
|
|
db.execute(
|
|
|
|
|
"create virtual table v using vec0("
|
|
|
|
|
"emb float[4], "
|
|
|
|
|
"+aux_text text, "
|
|
|
|
|
"chunk_size=8"
|
|
|
|
|
")"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
for i in range(1, 9):
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, emb, aux_text) values (?, ?, ?)",
|
|
|
|
|
[i, _f32([float(i)] * 4), f"aux_{i}"],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
for i in range(1, 9):
|
|
|
|
|
db.execute("delete from v where rowid = ?", [i])
|
|
|
|
|
|
|
|
|
|
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 0
|
|
|
|
|
assert db.execute("select count(*) from v_auxiliary").fetchone()[0] == 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_delete_with_text_primary_key(db):
|
|
|
|
|
db.execute(
|
|
|
|
|
"create virtual table v using vec0("
|
|
|
|
|
"id text primary key, emb float[4], chunk_size=8"
|
|
|
|
|
")"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(id, emb) values ('a', ?)", [_f32([1.0, 2.0, 3.0, 4.0])]
|
|
|
|
|
)
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(id, emb) values ('b', ?)", [_f32([5.0, 6.0, 7.0, 8.0])]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
db.execute("delete from v where id = 'a'")
|
|
|
|
|
|
|
|
|
|
# Vector blob slot 0 should be zeroed
|
|
|
|
|
blob = db.execute(
|
|
|
|
|
"select vectors from v_vector_chunks00 where rowid = 1"
|
|
|
|
|
).fetchone()[0]
|
|
|
|
|
first_slot = struct.unpack("<4f", blob[:16])
|
|
|
|
|
assert first_slot == (0.0, 0.0, 0.0, 0.0)
|
|
|
|
|
|
|
|
|
|
# Remaining row still queryable
|
|
|
|
|
row = db.execute("select emb from v where id = 'b'").fetchone()
|
|
|
|
|
assert row[0] == _f32([5.0, 6.0, 7.0, 8.0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_delete_with_partition_keys(db):
|
|
|
|
|
db.execute(
|
|
|
|
|
"create virtual table v using vec0("
|
|
|
|
|
"part text partition key, emb float[4], chunk_size=8"
|
|
|
|
|
")"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
for i in range(1, 9):
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, part, emb) values (?, 'A', ?)",
|
|
|
|
|
[i, _f32([float(i)] * 4)],
|
|
|
|
|
)
|
|
|
|
|
for i in range(9, 17):
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, part, emb) values (?, 'B', ?)",
|
|
|
|
|
[i, _f32([float(i)] * 4)],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Delete all from partition A
|
|
|
|
|
for i in range(1, 9):
|
|
|
|
|
db.execute("delete from v where rowid = ?", [i])
|
|
|
|
|
|
|
|
|
|
# 1 chunk should remain (partition B's)
|
|
|
|
|
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 1
|
|
|
|
|
|
|
|
|
|
# Partition B rows intact
|
|
|
|
|
for i in range(9, 17):
|
|
|
|
|
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
|
|
|
|
assert row[0] == _f32([float(i)] * 4)
|
|
|
|
|
|
|
|
|
|
# Re-insert into partition A works
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, part, emb) values (100, 'A', ?)",
|
|
|
|
|
[_f32([99.0, 99.0, 99.0, 99.0])],
|
|
|
|
|
)
|
|
|
|
|
row = db.execute("select emb from v where rowid = 100").fetchone()
|
|
|
|
|
assert row[0] == _f32([99.0, 99.0, 99.0, 99.0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_delete_int8_vectors(db):
|
|
|
|
|
db.execute("create virtual table v using vec0(emb int8[4], chunk_size=8)")
|
|
|
|
|
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, emb) values (1, vec_int8(?))",
|
|
|
|
|
[_int8([1, 2, 3, 4])],
|
|
|
|
|
)
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, emb) values (2, vec_int8(?))",
|
|
|
|
|
[_int8([5, 6, 7, 8])],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
db.execute("delete from v where rowid = 1")
|
|
|
|
|
|
|
|
|
|
blob = db.execute(
|
|
|
|
|
"select vectors from v_vector_chunks00 where rowid = 1"
|
|
|
|
|
).fetchone()[0]
|
|
|
|
|
# int8[4] = 4 bytes per slot
|
|
|
|
|
first_slot = struct.unpack("<4b", blob[:4])
|
|
|
|
|
assert first_slot == (0, 0, 0, 0)
|
|
|
|
|
second_slot = struct.unpack("<4b", blob[4:8])
|
|
|
|
|
assert second_slot == (5, 6, 7, 8)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_delete_bit_vectors(db):
|
|
|
|
|
db.execute("create virtual table v using vec0(emb bit[8], chunk_size=8)")
|
|
|
|
|
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, emb) values (1, vec_bit(?))",
|
|
|
|
|
[bytes([0xFF])],
|
|
|
|
|
)
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, emb) values (2, vec_bit(?))",
|
|
|
|
|
[bytes([0xAA])],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
db.execute("delete from v where rowid = 1")
|
|
|
|
|
|
|
|
|
|
blob = db.execute(
|
|
|
|
|
"select vectors from v_vector_chunks00 where rowid = 1"
|
|
|
|
|
).fetchone()[0]
|
|
|
|
|
# bit[8] = 1 byte per slot
|
|
|
|
|
assert blob[0:1] == bytes([0x00])
|
|
|
|
|
assert blob[1:2] == bytes([0xAA])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _file_db(tmp_path):
|
|
|
|
|
"""Open a file-backed DB (required for page_count to shrink after VACUUM)."""
|
|
|
|
|
db = sqlite3.connect(str(tmp_path / "test.db"))
|
|
|
|
|
db.row_factory = sqlite3.Row
|
|
|
|
|
db.enable_load_extension(True)
|
|
|
|
|
db.load_extension("dist/vec0")
|
|
|
|
|
db.enable_load_extension(False)
|
|
|
|
|
return db
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_delete_chunk_shrinks_pages(tmp_path):
|
|
|
|
|
"""Use large vectors (float[256]) so each chunk blob spans multiple pages,
|
|
|
|
|
making the page_count difference measurable after VACUUM."""
|
|
|
|
|
dims = 256
|
|
|
|
|
db = _file_db(tmp_path)
|
|
|
|
|
db.execute(f"create virtual table v using vec0(emb float[{dims}], chunk_size=8)")
|
|
|
|
|
|
|
|
|
|
for i in range(1, 25): # 3 full chunks of 8
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, emb) values (?, ?)",
|
|
|
|
|
[i, _f32([float(i)] * dims)],
|
|
|
|
|
)
|
|
|
|
|
db.commit()
|
|
|
|
|
pages_before = db.execute("pragma page_count").fetchone()[0]
|
|
|
|
|
|
|
|
|
|
# Delete all rows
|
|
|
|
|
for i in range(1, 25):
|
|
|
|
|
db.execute("delete from v where rowid = ?", [i])
|
|
|
|
|
db.commit()
|
|
|
|
|
|
|
|
|
|
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 0
|
|
|
|
|
|
|
|
|
|
db.execute("vacuum")
|
|
|
|
|
pages_after = db.execute("pragma page_count").fetchone()[0]
|
|
|
|
|
assert pages_after < pages_before, (
|
|
|
|
|
f"page_count should shrink after deleting all chunks and vacuum: "
|
|
|
|
|
f"{pages_before} -> {pages_after}"
|
|
|
|
|
)
|
|
|
|
|
db.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_delete_one_chunk_of_two_shrinks_pages(tmp_path):
|
|
|
|
|
"""Use large vectors (float[256]) so each chunk blob spans multiple pages,
|
|
|
|
|
making the page_count difference measurable after VACUUM."""
|
|
|
|
|
dims = 256
|
|
|
|
|
db = _file_db(tmp_path)
|
|
|
|
|
db.execute(f"create virtual table v using vec0(emb float[{dims}], chunk_size=8)")
|
|
|
|
|
|
|
|
|
|
for i in range(1, 17): # 2 full chunks of 8
|
|
|
|
|
db.execute(
|
|
|
|
|
"insert into v(rowid, emb) values (?, ?)",
|
|
|
|
|
[i, _f32([float(i)] * dims)],
|
|
|
|
|
)
|
|
|
|
|
db.commit()
|
|
|
|
|
pages_before = db.execute("pragma page_count").fetchone()[0]
|
|
|
|
|
|
|
|
|
|
# Delete all rows from the first chunk (rows 1-8)
|
|
|
|
|
for i in range(1, 9):
|
|
|
|
|
db.execute("delete from v where rowid = ?", [i])
|
|
|
|
|
db.commit()
|
|
|
|
|
|
|
|
|
|
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 1
|
|
|
|
|
|
|
|
|
|
db.execute("vacuum")
|
|
|
|
|
pages_after = db.execute("pragma page_count").fetchone()[0]
|
|
|
|
|
assert pages_after < pages_before, (
|
|
|
|
|
f"page_count should shrink after deleting one chunk and vacuum: "
|
|
|
|
|
f"{pages_before} -> {pages_after}"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Remaining rows still queryable after vacuum
|
|
|
|
|
for i in range(9, 17):
|
|
|
|
|
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
|
|
|
|
assert row[0] == _f32([float(i)] * dims)
|
|
|
|
|
db.close()
|
2026-03-31 17:43:49 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_wal_concurrent_reader_during_write(tmp_path):
|
|
|
|
|
"""In WAL mode, a reader should see a consistent snapshot while a writer inserts."""
|
|
|
|
|
dims = 4
|
|
|
|
|
db_path = str(tmp_path / "test.db")
|
|
|
|
|
|
|
|
|
|
# Writer: create table, insert initial rows, enable WAL
|
|
|
|
|
writer = sqlite3.connect(db_path)
|
|
|
|
|
writer.enable_load_extension(True)
|
|
|
|
|
writer.load_extension("dist/vec0")
|
|
|
|
|
writer.execute("PRAGMA journal_mode=WAL")
|
|
|
|
|
writer.execute(
|
|
|
|
|
f"CREATE VIRTUAL TABLE v USING vec0(emb float[{dims}])"
|
|
|
|
|
)
|
|
|
|
|
for i in range(1, 11):
|
|
|
|
|
writer.execute("INSERT INTO v(rowid, emb) VALUES (?, ?)", [i, _f32([float(i)] * dims)])
|
|
|
|
|
writer.commit()
|
|
|
|
|
|
|
|
|
|
# Reader: open separate connection, start read
|
|
|
|
|
reader = sqlite3.connect(db_path)
|
|
|
|
|
reader.enable_load_extension(True)
|
|
|
|
|
reader.load_extension("dist/vec0")
|
|
|
|
|
|
|
|
|
|
# Reader sees 10 rows
|
|
|
|
|
count_before = reader.execute("SELECT count(*) FROM v").fetchone()[0]
|
|
|
|
|
assert count_before == 10
|
|
|
|
|
|
|
|
|
|
# Writer inserts more rows (not yet committed)
|
|
|
|
|
writer.execute("BEGIN")
|
|
|
|
|
for i in range(11, 21):
|
|
|
|
|
writer.execute("INSERT INTO v(rowid, emb) VALUES (?, ?)", [i, _f32([float(i)] * dims)])
|
|
|
|
|
|
|
|
|
|
# Reader still sees 10 (WAL snapshot isolation)
|
|
|
|
|
count_during = reader.execute("SELECT count(*) FROM v").fetchone()[0]
|
|
|
|
|
assert count_during == 10
|
|
|
|
|
|
|
|
|
|
# KNN during writer's transaction should work on reader's snapshot
|
|
|
|
|
rows = reader.execute(
|
|
|
|
|
"SELECT rowid FROM v WHERE emb MATCH ? AND k = 5",
|
|
|
|
|
[_f32([1.0] * dims)],
|
|
|
|
|
).fetchall()
|
|
|
|
|
assert len(rows) == 5
|
|
|
|
|
assert all(r[0] <= 10 for r in rows) # only original rows
|
|
|
|
|
|
|
|
|
|
# Writer commits
|
|
|
|
|
writer.commit()
|
|
|
|
|
|
|
|
|
|
# Reader sees new rows after re-query (new snapshot)
|
|
|
|
|
count_after = reader.execute("SELECT count(*) FROM v").fetchone()[0]
|
|
|
|
|
assert count_after == 20
|
|
|
|
|
|
|
|
|
|
writer.close()
|
|
|
|
|
reader.close()
|