Compare commits

..

4 commits

Author SHA1 Message Date
Alex Garcia
04d28bd217 v0.1.10-alpha.4 2026-05-17 23:50:43 -07:00
Alex Garcia
fe941716ad Finalize all cached vec0 stmts on commit (fixes #295)
vec0Sync only finalized stmtLatestChunk and the four stmtRowids* stmts.
The IVF/DiskANN/vectors stmts persisted on the vtab until xDisconnect,
which blocked sqlite3_close() (non-v2) with SQLITE_BUSY — the original
mozStorage case from #295. The same leak also broke VACUUM with
"SQL statements in progress" after any DiskANN operation.

Switch vec0Sync to call vec0_free_resources, which already finalizes
the full cache. Also fix a latent bug: the DiskANN block in
vec0_free_resources was nested inside #if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE,
so in the default build (DiskANN on, IVF off) those finalizes were
unreachable even from xDisconnect/xDestroy. Split into two independent
#if guards.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-17 23:49:05 -07:00
Alex Garcia
8b81f40d1e Rename all IVF shadow tables in vec0Rename
vec0Rename only emitted ALTER TABLE on `<name>_ivf_cells%02d`, so renaming
an IVF-indexed vec0 table left `_ivf_centroids`, `_ivf_rowid_map`, and
`_ivf_vectors` (when quantizer != none) with the old prefix. Subsequent
queries against the renamed table broke, and DROP TABLE left those three
shadows orphaned in the schema. Same shape as the DiskANN/rescore bug fixed
in #294, just for the IVF branch.

Mirror ivf_create_shadow_tables: emit ALTER for all four IVF shadows,
gating `_ivf_vectors` on quantizer != VEC0_IVF_QUANTIZER_NONE.

Adds test-ivf-rename.py (auto-skipped on default builds via conftest's
test-ivf prefix rule) covering quantizer=none, quantizer=binary, and
DROP-after-rename. Also adds a rescore rename regression test to
test-rename.py to lock down the (already-correct) rescore path.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-17 23:09:27 -07:00
Rolf Rando
8105eee61e
Skip _vector_chunks rename for non-FLAT vec0 columns (#294)
vec0Rename emits an unconditional ALTER TABLE on `<name>_vector_chunks%02d`
for every vector column, but non-FLAT columns (rescore, IVF, DiskANN) don't
create that shadow table — so ALTER TABLE RENAME on a DiskANN-indexed (or
rescore/IVF) vec0 table fails with `no such table` and leaves any cached
prepared statements still referencing the old name.

Mirror the guard already used at create time in vec0_init around
VEC0_SHADOW_VECTOR_N_CREATE: only rename `_vector_chunks` when the column's
index_type is VEC0_INDEX_TYPE_FLAT.

Adds a regression test exercising rename on a DiskANN-indexed table.
2026-05-17 22:56:52 -07:00
5 changed files with 219 additions and 27 deletions

View file

@ -1 +1 @@
0.1.10-alpha.3
0.1.10-alpha.4

View file

@ -3695,13 +3695,15 @@ void vec0_free_resources(vec0_vtab *p) {
sqlite3_finalize(p->stmtIvfRowidMapLookup[i]); p->stmtIvfRowidMapLookup[i] = NULL;
sqlite3_finalize(p->stmtIvfRowidMapDelete[i]); p->stmtIvfRowidMapDelete[i] = NULL;
sqlite3_finalize(p->stmtIvfCentroidsAll[i]); p->stmtIvfCentroidsAll[i] = NULL;
}
#endif
#if SQLITE_VEC_ENABLE_DISKANN
for (int i = 0; i < VEC0_MAX_VECTOR_COLUMNS; i++) {
sqlite3_finalize(p->stmtDiskannNodeRead[i]); p->stmtDiskannNodeRead[i] = NULL;
sqlite3_finalize(p->stmtDiskannNodeWrite[i]); p->stmtDiskannNodeWrite[i] = NULL;
sqlite3_finalize(p->stmtDiskannNodeInsert[i]); p->stmtDiskannNodeInsert[i] = NULL;
sqlite3_finalize(p->stmtVectorsRead[i]); p->stmtVectorsRead[i] = NULL;
sqlite3_finalize(p->stmtVectorsInsert[i]); p->stmtVectorsInsert[i] = NULL;
#endif
}
#endif
}
@ -10370,28 +10372,7 @@ static int vec0Begin(sqlite3_vtab *pVTab) {
return SQLITE_OK;
}
static int vec0Sync(sqlite3_vtab *pVTab) {
UNUSED_PARAMETER(pVTab);
vec0_vtab *p = (vec0_vtab *)pVTab;
if (p->stmtLatestChunk) {
sqlite3_finalize(p->stmtLatestChunk);
p->stmtLatestChunk = NULL;
}
if (p->stmtRowidsInsertRowid) {
sqlite3_finalize(p->stmtRowidsInsertRowid);
p->stmtRowidsInsertRowid = NULL;
}
if (p->stmtRowidsInsertId) {
sqlite3_finalize(p->stmtRowidsInsertId);
p->stmtRowidsInsertId = NULL;
}
if (p->stmtRowidsUpdatePosition) {
sqlite3_finalize(p->stmtRowidsUpdatePosition);
p->stmtRowidsUpdatePosition = NULL;
}
if (p->stmtRowidsGetChunkPosition) {
sqlite3_finalize(p->stmtRowidsGetChunkPosition);
p->stmtRowidsGetChunkPosition = NULL;
}
vec0_free_resources((vec0_vtab *)pVTab);
return SQLITE_OK;
}
static int vec0Commit(sqlite3_vtab *pVTab) {
@ -10435,9 +10416,13 @@ static int vec0Rename(sqlite3_vtab *pVtab, const char *zNew) {
// Per-vector-column shadow tables
for (int i = 0; i < p->numVectorColumns; i++) {
sqlite3_str_appendf(s,
"ALTER TABLE \"%w\".\"%w_vector_chunks%02d\" RENAME TO \"%w_vector_chunks%02d\";",
p->schemaName, p->tableName, i, zNew, i);
// Non-FLAT columns (rescore, IVF, DiskANN) don't create _vector_chunks
// (mirror the guard in vec0_init around VEC0_SHADOW_VECTOR_N_CREATE).
if (p->vector_columns[i].index_type == VEC0_INDEX_TYPE_FLAT) {
sqlite3_str_appendf(s,
"ALTER TABLE \"%w\".\"%w_vector_chunks%02d\" RENAME TO \"%w_vector_chunks%02d\";",
p->schemaName, p->tableName, i, zNew, i);
}
#if SQLITE_VEC_ENABLE_RESCORE
if (p->shadowRescoreChunksNames[i]) {
@ -10468,9 +10453,22 @@ static int vec0Rename(sqlite3_vtab *pVtab, const char *zNew) {
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
for (int i = 0; i < p->numVectorColumns; i++) {
if (p->shadowIvfCellsNames[i]) {
sqlite3_str_appendf(s,
"ALTER TABLE \"%w\".\"%w_ivf_centroids%02d\" RENAME TO \"%w_ivf_centroids%02d\";",
p->schemaName, p->tableName, i, zNew, i);
sqlite3_str_appendf(s,
"ALTER TABLE \"%w\".\"%w_ivf_cells%02d\" RENAME TO \"%w_ivf_cells%02d\";",
p->schemaName, p->tableName, i, zNew, i);
sqlite3_str_appendf(s,
"ALTER TABLE \"%w\".\"%w_ivf_rowid_map%02d\" RENAME TO \"%w_ivf_rowid_map%02d\";",
p->schemaName, p->tableName, i, zNew, i);
// _ivf_vectors is only created when quantizer != none
// (mirror ivf_create_shadow_tables in sqlite-vec-ivf.c).
if (p->vector_columns[i].ivf.quantizer != VEC0_IVF_QUANTIZER_NONE) {
sqlite3_str_appendf(s,
"ALTER TABLE \"%w\".\"%w_ivf_vectors%02d\" RENAME TO \"%w_ivf_vectors%02d\";",
p->schemaName, p->tableName, i, zNew, i);
}
}
}
#endif

View file

@ -0,0 +1,34 @@
"""Regression tests for #295: vec0 must finalize cached prepared statements
on every commit, not just the rowid subset.
Before the fix, `vec0Sync` only finalized `stmtLatestChunk` and the four
`stmtRowids*` stmts; the DiskANN/IVF/vectors-read stmts persisted on the
vtab indefinitely. Symptom: VACUUM after any DiskANN operation failed with
"SQL statements in progress" because the cached stmts kept the connection
busy. (The same leak also caused `sqlite3_close()` non-v2 to return
SQLITE_BUSY the original Firefox case in issue #295.)
A separate latent bug the DiskANN finalize block in `vec0_free_resources`
was nested inside `#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE`, so even
xDisconnect/xDestroy didn't finalize DiskANN stmts in the default build.
"""
from helpers import _f32
def test_vacuum_after_diskann_inserts(db):
db.execute(
"create virtual table v using vec0("
"a float[8] indexed by diskann(neighbor_quantizer=binary))"
)
for i in range(1, 11):
db.execute("insert into v(rowid, a) values (?, ?)",
(i, _f32([0.1 * i] * 8)))
db.commit()
db.execute("VACUUM")
def test_vacuum_after_flat_inserts(db):
db.execute("create virtual table v using vec0(a float[2])")
db.execute("insert into v(rowid, a) values (1, ?)", (_f32([0.1, 0.2]),))
db.commit()
db.execute("VACUUM")

98
tests/test-ivf-rename.py Normal file
View file

@ -0,0 +1,98 @@
import sqlite3
import pytest
from helpers import _f32
def _shadow_tables(db, prefix):
"""Return sorted list of shadow table names for a given prefix."""
return sorted([
row[0] for row in db.execute(
r"select name from sqlite_master where name like ? escape '\' and type='table' order by 1",
[f"{prefix}\\__%"],
).fetchall()
])
def test_rename_ivf_no_quantizer(db):
"""Rename should rename all IVF shadow tables (_ivf_centroids, _ivf_cells,
_ivf_rowid_map). quantizer=none no _ivf_vectors table."""
db.execute("""
CREATE VIRTUAL TABLE v USING vec0(
a float[4] indexed by ivf(nlist=2, quantizer=none)
)
""")
db.execute("insert into v(rowid, a) values (1, ?)", [_f32([0.1] * 4)])
db.execute("insert into v(rowid, a) values (2, ?)", [_f32([0.9] * 4)])
before = _shadow_tables(db, "v")
assert "v_ivf_centroids00" in before
assert "v_ivf_cells00" in before
assert "v_ivf_rowid_map00" in before
assert "v_ivf_vectors00" not in before # quantizer=none -> no _ivf_vectors
assert "v_vector_chunks00" not in before
db.execute("ALTER TABLE v RENAME TO v2")
# Querying the renamed table should still work — it hits _ivf_cells,
# _ivf_centroids (when trained), and _ivf_rowid_map.
rows = db.execute(
"select rowid from v2 where a match ? and k=10",
[_f32([0.1] * 4)],
).fetchall()
assert any(r[0] == 1 for r in rows)
after = _shadow_tables(db, "v2")
assert "v2_ivf_centroids00" in after
assert "v2_ivf_cells00" in after
assert "v2_ivf_rowid_map00" in after
# No old shadow tables should remain
assert _shadow_tables(db, "v") == []
def test_rename_ivf_quantizer_binary(db):
"""Rename should also rename _ivf_vectors when quantizer != none."""
db.execute("""
CREATE VIRTUAL TABLE v USING vec0(
a float[8] indexed by ivf(nlist=2, quantizer=binary)
)
""")
db.execute("insert into v(rowid, a) values (1, ?)", [_f32([0.1] * 8)])
before = _shadow_tables(db, "v")
assert "v_ivf_centroids00" in before
assert "v_ivf_cells00" in before
assert "v_ivf_rowid_map00" in before
assert "v_ivf_vectors00" in before # quantizer=binary creates _ivf_vectors
db.execute("ALTER TABLE v RENAME TO v2")
rows = db.execute(
"select rowid from v2 where a match ? and k=10",
[_f32([0.1] * 8)],
).fetchall()
assert rows[0][0] == 1
after = _shadow_tables(db, "v2")
assert "v2_ivf_centroids00" in after
assert "v2_ivf_cells00" in after
assert "v2_ivf_rowid_map00" in after
assert "v2_ivf_vectors00" in after
assert _shadow_tables(db, "v") == []
def test_rename_ivf_drop_after(db):
"""DROP TABLE on a renamed IVF table must drop every shadow table — leftover
shadows from a half-renamed IVF index would orphan tables in the schema."""
db.execute("""
CREATE VIRTUAL TABLE v USING vec0(
a float[8] indexed by ivf(nlist=2, quantizer=binary)
)
""")
db.execute("insert into v(rowid, a) values (1, ?)", [_f32([0.1] * 8)])
db.execute("ALTER TABLE v RENAME TO v2")
db.execute("DROP TABLE v2")
assert _shadow_tables(db, "v") == []
assert _shadow_tables(db, "v2") == []

View file

@ -162,6 +162,68 @@ def test_rename_with_metadata(db):
assert _shadow_tables(db, "v") == []
def test_rename_diskann(db):
"""Rename should work on DiskANN-indexed tables (no _vector_chunks shadow)."""
db.execute("""
CREATE VIRTUAL TABLE v USING vec0(
a float[8] INDEXED BY diskann(neighbor_quantizer=binary)
)
""")
db.execute("insert into v(rowid, a) values (1, ?)", [_f32([0.1] * 8)])
# DiskANN columns use _vectors / _diskann_nodes / _diskann_buffer instead
# of _vector_chunks; the rename must skip the missing _vector_chunks ALTER.
before = _shadow_tables(db, "v")
assert "v_diskann_nodes00" in before
assert "v_vector_chunks00" not in before
db.execute("ALTER TABLE v RENAME TO v2")
rows = db.execute(
"select rowid from v2 where a match ? and k=10",
[_f32([0.1] * 8)],
).fetchall()
assert rows[0][0] == 1
after = _shadow_tables(db, "v2")
assert "v2_diskann_nodes00" in after
assert "v2_vector_chunks00" not in after
assert _shadow_tables(db, "v") == []
def test_rename_rescore(db):
"""Rename should work on rescore-indexed tables (no _vector_chunks shadow)."""
db.execute("""
CREATE VIRTUAL TABLE v USING vec0(
a float[8] indexed by rescore(quantizer=bit)
)
""")
db.execute("insert into v(rowid, a) values (1, ?)", [_f32([0.1] * 8)])
db.execute("insert into v(rowid, a) values (2, ?)", [_f32([0.9] * 8)])
# Rescore columns use _rescore_chunks / _rescore_vectors instead of
# _vector_chunks; the rename must skip the missing _vector_chunks ALTER
# and rename both rescore shadow tables.
before = _shadow_tables(db, "v")
assert "v_rescore_chunks00" in before
assert "v_rescore_vectors00" in before
assert "v_vector_chunks00" not in before
db.execute("ALTER TABLE v RENAME TO v2")
rows = db.execute(
"select rowid from v2 where a match ? and k=10",
[_f32([0.1] * 8)],
).fetchall()
assert rows[0][0] == 1
after = _shadow_tables(db, "v2")
assert "v2_rescore_chunks00" in after
assert "v2_rescore_vectors00" in after
assert "v2_vector_chunks00" not in after
assert _shadow_tables(db, "v") == []
def test_rename_drop_after(db):
"""DROP TABLE should work on a renamed table."""
db.execute("create virtual table v using vec0(a float[2], chunk_size=8)")