From 8105eee61e2cbdae6e335c32fa5e59f897bc1cd5 Mon Sep 17 00:00:00 2001 From: Rolf Rando <119353883+rolf-moz@users.noreply.github.com> Date: Sun, 17 May 2026 22:56:52 -0700 Subject: [PATCH 1/4] Skip _vector_chunks rename for non-FLAT vec0 columns (#294) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vec0Rename emits an unconditional ALTER TABLE on `_vector_chunks%02d` for every vector column, but non-FLAT columns (rescore, IVF, DiskANN) don't create that shadow table — so ALTER TABLE RENAME on a DiskANN-indexed (or rescore/IVF) vec0 table fails with `no such table` and leaves any cached prepared statements still referencing the old name. Mirror the guard already used at create time in vec0_init around VEC0_SHADOW_VECTOR_N_CREATE: only rename `_vector_chunks` when the column's index_type is VEC0_INDEX_TYPE_FLAT. Adds a regression test exercising rename on a DiskANN-indexed table. --- sqlite-vec.c | 10 +++++++--- tests/test-rename.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/sqlite-vec.c b/sqlite-vec.c index dc33c67..669fc8b 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -10435,9 +10435,13 @@ static int vec0Rename(sqlite3_vtab *pVtab, const char *zNew) { // Per-vector-column shadow tables for (int i = 0; i < p->numVectorColumns; i++) { - sqlite3_str_appendf(s, - "ALTER TABLE \"%w\".\"%w_vector_chunks%02d\" RENAME TO \"%w_vector_chunks%02d\";", - p->schemaName, p->tableName, i, zNew, i); + // Non-FLAT columns (rescore, IVF, DiskANN) don't create _vector_chunks + // (mirror the guard in vec0_init around VEC0_SHADOW_VECTOR_N_CREATE). + if (p->vector_columns[i].index_type == VEC0_INDEX_TYPE_FLAT) { + sqlite3_str_appendf(s, + "ALTER TABLE \"%w\".\"%w_vector_chunks%02d\" RENAME TO \"%w_vector_chunks%02d\";", + p->schemaName, p->tableName, i, zNew, i); + } #if SQLITE_VEC_ENABLE_RESCORE if (p->shadowRescoreChunksNames[i]) { diff --git a/tests/test-rename.py b/tests/test-rename.py index 6da9d32..3c1007e 100644 --- a/tests/test-rename.py +++ b/tests/test-rename.py @@ -162,6 +162,35 @@ def test_rename_with_metadata(db): assert _shadow_tables(db, "v") == [] +def test_rename_diskann(db): + """Rename should work on DiskANN-indexed tables (no _vector_chunks shadow).""" + db.execute(""" + CREATE VIRTUAL TABLE v USING vec0( + a float[8] INDEXED BY diskann(neighbor_quantizer=binary) + ) + """) + db.execute("insert into v(rowid, a) values (1, ?)", [_f32([0.1] * 8)]) + + # DiskANN columns use _vectors / _diskann_nodes / _diskann_buffer instead + # of _vector_chunks; the rename must skip the missing _vector_chunks ALTER. + before = _shadow_tables(db, "v") + assert "v_diskann_nodes00" in before + assert "v_vector_chunks00" not in before + + db.execute("ALTER TABLE v RENAME TO v2") + + rows = db.execute( + "select rowid from v2 where a match ? and k=10", + [_f32([0.1] * 8)], + ).fetchall() + assert rows[0][0] == 1 + + after = _shadow_tables(db, "v2") + assert "v2_diskann_nodes00" in after + assert "v2_vector_chunks00" not in after + assert _shadow_tables(db, "v") == [] + + def test_rename_drop_after(db): """DROP TABLE should work on a renamed table.""" db.execute("create virtual table v using vec0(a float[2], chunk_size=8)") From 8b81f40d1e7153c92d3418b9c359fbb1702f123a Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Sun, 17 May 2026 23:09:27 -0700 Subject: [PATCH 2/4] Rename all IVF shadow tables in vec0Rename vec0Rename only emitted ALTER TABLE on `_ivf_cells%02d`, so renaming an IVF-indexed vec0 table left `_ivf_centroids`, `_ivf_rowid_map`, and `_ivf_vectors` (when quantizer != none) with the old prefix. Subsequent queries against the renamed table broke, and DROP TABLE left those three shadows orphaned in the schema. Same shape as the DiskANN/rescore bug fixed in #294, just for the IVF branch. Mirror ivf_create_shadow_tables: emit ALTER for all four IVF shadows, gating `_ivf_vectors` on quantizer != VEC0_IVF_QUANTIZER_NONE. Adds test-ivf-rename.py (auto-skipped on default builds via conftest's test-ivf prefix rule) covering quantizer=none, quantizer=binary, and DROP-after-rename. Also adds a rescore rename regression test to test-rename.py to lock down the (already-correct) rescore path. Co-Authored-By: Claude Opus 4.7 (1M context) --- sqlite-vec.c | 13 ++++++ tests/test-ivf-rename.py | 98 ++++++++++++++++++++++++++++++++++++++++ tests/test-rename.py | 33 ++++++++++++++ 3 files changed, 144 insertions(+) create mode 100644 tests/test-ivf-rename.py diff --git a/sqlite-vec.c b/sqlite-vec.c index 669fc8b..0a33577 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -10472,9 +10472,22 @@ static int vec0Rename(sqlite3_vtab *pVtab, const char *zNew) { #if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE for (int i = 0; i < p->numVectorColumns; i++) { if (p->shadowIvfCellsNames[i]) { + sqlite3_str_appendf(s, + "ALTER TABLE \"%w\".\"%w_ivf_centroids%02d\" RENAME TO \"%w_ivf_centroids%02d\";", + p->schemaName, p->tableName, i, zNew, i); sqlite3_str_appendf(s, "ALTER TABLE \"%w\".\"%w_ivf_cells%02d\" RENAME TO \"%w_ivf_cells%02d\";", p->schemaName, p->tableName, i, zNew, i); + sqlite3_str_appendf(s, + "ALTER TABLE \"%w\".\"%w_ivf_rowid_map%02d\" RENAME TO \"%w_ivf_rowid_map%02d\";", + p->schemaName, p->tableName, i, zNew, i); + // _ivf_vectors is only created when quantizer != none + // (mirror ivf_create_shadow_tables in sqlite-vec-ivf.c). + if (p->vector_columns[i].ivf.quantizer != VEC0_IVF_QUANTIZER_NONE) { + sqlite3_str_appendf(s, + "ALTER TABLE \"%w\".\"%w_ivf_vectors%02d\" RENAME TO \"%w_ivf_vectors%02d\";", + p->schemaName, p->tableName, i, zNew, i); + } } } #endif diff --git a/tests/test-ivf-rename.py b/tests/test-ivf-rename.py new file mode 100644 index 0000000..980c958 --- /dev/null +++ b/tests/test-ivf-rename.py @@ -0,0 +1,98 @@ +import sqlite3 +import pytest +from helpers import _f32 + + +def _shadow_tables(db, prefix): + """Return sorted list of shadow table names for a given prefix.""" + return sorted([ + row[0] for row in db.execute( + r"select name from sqlite_master where name like ? escape '\' and type='table' order by 1", + [f"{prefix}\\__%"], + ).fetchall() + ]) + + +def test_rename_ivf_no_quantizer(db): + """Rename should rename all IVF shadow tables (_ivf_centroids, _ivf_cells, + _ivf_rowid_map). quantizer=none — no _ivf_vectors table.""" + db.execute(""" + CREATE VIRTUAL TABLE v USING vec0( + a float[4] indexed by ivf(nlist=2, quantizer=none) + ) + """) + db.execute("insert into v(rowid, a) values (1, ?)", [_f32([0.1] * 4)]) + db.execute("insert into v(rowid, a) values (2, ?)", [_f32([0.9] * 4)]) + + before = _shadow_tables(db, "v") + assert "v_ivf_centroids00" in before + assert "v_ivf_cells00" in before + assert "v_ivf_rowid_map00" in before + assert "v_ivf_vectors00" not in before # quantizer=none -> no _ivf_vectors + assert "v_vector_chunks00" not in before + + db.execute("ALTER TABLE v RENAME TO v2") + + # Querying the renamed table should still work — it hits _ivf_cells, + # _ivf_centroids (when trained), and _ivf_rowid_map. + rows = db.execute( + "select rowid from v2 where a match ? and k=10", + [_f32([0.1] * 4)], + ).fetchall() + assert any(r[0] == 1 for r in rows) + + after = _shadow_tables(db, "v2") + assert "v2_ivf_centroids00" in after + assert "v2_ivf_cells00" in after + assert "v2_ivf_rowid_map00" in after + + # No old shadow tables should remain + assert _shadow_tables(db, "v") == [] + + +def test_rename_ivf_quantizer_binary(db): + """Rename should also rename _ivf_vectors when quantizer != none.""" + db.execute(""" + CREATE VIRTUAL TABLE v USING vec0( + a float[8] indexed by ivf(nlist=2, quantizer=binary) + ) + """) + db.execute("insert into v(rowid, a) values (1, ?)", [_f32([0.1] * 8)]) + + before = _shadow_tables(db, "v") + assert "v_ivf_centroids00" in before + assert "v_ivf_cells00" in before + assert "v_ivf_rowid_map00" in before + assert "v_ivf_vectors00" in before # quantizer=binary creates _ivf_vectors + + db.execute("ALTER TABLE v RENAME TO v2") + + rows = db.execute( + "select rowid from v2 where a match ? and k=10", + [_f32([0.1] * 8)], + ).fetchall() + assert rows[0][0] == 1 + + after = _shadow_tables(db, "v2") + assert "v2_ivf_centroids00" in after + assert "v2_ivf_cells00" in after + assert "v2_ivf_rowid_map00" in after + assert "v2_ivf_vectors00" in after + + assert _shadow_tables(db, "v") == [] + + +def test_rename_ivf_drop_after(db): + """DROP TABLE on a renamed IVF table must drop every shadow table — leftover + shadows from a half-renamed IVF index would orphan tables in the schema.""" + db.execute(""" + CREATE VIRTUAL TABLE v USING vec0( + a float[8] indexed by ivf(nlist=2, quantizer=binary) + ) + """) + db.execute("insert into v(rowid, a) values (1, ?)", [_f32([0.1] * 8)]) + db.execute("ALTER TABLE v RENAME TO v2") + db.execute("DROP TABLE v2") + + assert _shadow_tables(db, "v") == [] + assert _shadow_tables(db, "v2") == [] diff --git a/tests/test-rename.py b/tests/test-rename.py index 3c1007e..b0c1157 100644 --- a/tests/test-rename.py +++ b/tests/test-rename.py @@ -191,6 +191,39 @@ def test_rename_diskann(db): assert _shadow_tables(db, "v") == [] +def test_rename_rescore(db): + """Rename should work on rescore-indexed tables (no _vector_chunks shadow).""" + db.execute(""" + CREATE VIRTUAL TABLE v USING vec0( + a float[8] indexed by rescore(quantizer=bit) + ) + """) + db.execute("insert into v(rowid, a) values (1, ?)", [_f32([0.1] * 8)]) + db.execute("insert into v(rowid, a) values (2, ?)", [_f32([0.9] * 8)]) + + # Rescore columns use _rescore_chunks / _rescore_vectors instead of + # _vector_chunks; the rename must skip the missing _vector_chunks ALTER + # and rename both rescore shadow tables. + before = _shadow_tables(db, "v") + assert "v_rescore_chunks00" in before + assert "v_rescore_vectors00" in before + assert "v_vector_chunks00" not in before + + db.execute("ALTER TABLE v RENAME TO v2") + + rows = db.execute( + "select rowid from v2 where a match ? and k=10", + [_f32([0.1] * 8)], + ).fetchall() + assert rows[0][0] == 1 + + after = _shadow_tables(db, "v2") + assert "v2_rescore_chunks00" in after + assert "v2_rescore_vectors00" in after + assert "v2_vector_chunks00" not in after + assert _shadow_tables(db, "v") == [] + + def test_rename_drop_after(db): """DROP TABLE should work on a renamed table.""" db.execute("create virtual table v using vec0(a float[2], chunk_size=8)") From fe941716ad86363d6be99537db955305abb3ca61 Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Sun, 17 May 2026 23:49:05 -0700 Subject: [PATCH 3/4] Finalize all cached vec0 stmts on commit (fixes #295) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vec0Sync only finalized stmtLatestChunk and the four stmtRowids* stmts. The IVF/DiskANN/vectors stmts persisted on the vtab until xDisconnect, which blocked sqlite3_close() (non-v2) with SQLITE_BUSY — the original mozStorage case from #295. The same leak also broke VACUUM with "SQL statements in progress" after any DiskANN operation. Switch vec0Sync to call vec0_free_resources, which already finalizes the full cache. Also fix a latent bug: the DiskANN block in vec0_free_resources was nested inside #if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE, so in the default build (DiskANN on, IVF off) those finalizes were unreachable even from xDisconnect/xDestroy. Split into two independent #if guards. Co-Authored-By: Claude Opus 4.7 (1M context) --- sqlite-vec.c | 27 ++++----------------------- tests/test-cache-finalize.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 23 deletions(-) create mode 100644 tests/test-cache-finalize.py diff --git a/sqlite-vec.c b/sqlite-vec.c index 0a33577..7af3b6a 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -3695,13 +3695,15 @@ void vec0_free_resources(vec0_vtab *p) { sqlite3_finalize(p->stmtIvfRowidMapLookup[i]); p->stmtIvfRowidMapLookup[i] = NULL; sqlite3_finalize(p->stmtIvfRowidMapDelete[i]); p->stmtIvfRowidMapDelete[i] = NULL; sqlite3_finalize(p->stmtIvfCentroidsAll[i]); p->stmtIvfCentroidsAll[i] = NULL; + } +#endif #if SQLITE_VEC_ENABLE_DISKANN + for (int i = 0; i < VEC0_MAX_VECTOR_COLUMNS; i++) { sqlite3_finalize(p->stmtDiskannNodeRead[i]); p->stmtDiskannNodeRead[i] = NULL; sqlite3_finalize(p->stmtDiskannNodeWrite[i]); p->stmtDiskannNodeWrite[i] = NULL; sqlite3_finalize(p->stmtDiskannNodeInsert[i]); p->stmtDiskannNodeInsert[i] = NULL; sqlite3_finalize(p->stmtVectorsRead[i]); p->stmtVectorsRead[i] = NULL; sqlite3_finalize(p->stmtVectorsInsert[i]); p->stmtVectorsInsert[i] = NULL; -#endif } #endif } @@ -10370,28 +10372,7 @@ static int vec0Begin(sqlite3_vtab *pVTab) { return SQLITE_OK; } static int vec0Sync(sqlite3_vtab *pVTab) { - UNUSED_PARAMETER(pVTab); - vec0_vtab *p = (vec0_vtab *)pVTab; - if (p->stmtLatestChunk) { - sqlite3_finalize(p->stmtLatestChunk); - p->stmtLatestChunk = NULL; - } - if (p->stmtRowidsInsertRowid) { - sqlite3_finalize(p->stmtRowidsInsertRowid); - p->stmtRowidsInsertRowid = NULL; - } - if (p->stmtRowidsInsertId) { - sqlite3_finalize(p->stmtRowidsInsertId); - p->stmtRowidsInsertId = NULL; - } - if (p->stmtRowidsUpdatePosition) { - sqlite3_finalize(p->stmtRowidsUpdatePosition); - p->stmtRowidsUpdatePosition = NULL; - } - if (p->stmtRowidsGetChunkPosition) { - sqlite3_finalize(p->stmtRowidsGetChunkPosition); - p->stmtRowidsGetChunkPosition = NULL; - } + vec0_free_resources((vec0_vtab *)pVTab); return SQLITE_OK; } static int vec0Commit(sqlite3_vtab *pVTab) { diff --git a/tests/test-cache-finalize.py b/tests/test-cache-finalize.py new file mode 100644 index 0000000..c81f45a --- /dev/null +++ b/tests/test-cache-finalize.py @@ -0,0 +1,34 @@ +"""Regression tests for #295: vec0 must finalize cached prepared statements +on every commit, not just the rowid subset. + +Before the fix, `vec0Sync` only finalized `stmtLatestChunk` and the four +`stmtRowids*` stmts; the DiskANN/IVF/vectors-read stmts persisted on the +vtab indefinitely. Symptom: VACUUM after any DiskANN operation failed with +"SQL statements in progress" because the cached stmts kept the connection +busy. (The same leak also caused `sqlite3_close()` non-v2 to return +SQLITE_BUSY — the original Firefox case in issue #295.) + +A separate latent bug — the DiskANN finalize block in `vec0_free_resources` +was nested inside `#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE`, so even +xDisconnect/xDestroy didn't finalize DiskANN stmts in the default build. +""" +from helpers import _f32 + + +def test_vacuum_after_diskann_inserts(db): + db.execute( + "create virtual table v using vec0(" + "a float[8] indexed by diskann(neighbor_quantizer=binary))" + ) + for i in range(1, 11): + db.execute("insert into v(rowid, a) values (?, ?)", + (i, _f32([0.1 * i] * 8))) + db.commit() + db.execute("VACUUM") + + +def test_vacuum_after_flat_inserts(db): + db.execute("create virtual table v using vec0(a float[2])") + db.execute("insert into v(rowid, a) values (1, ?)", (_f32([0.1, 0.2]),)) + db.commit() + db.execute("VACUUM") From 04d28bd21773981e2d266bbf6aa4efbd011eb4f6 Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Sun, 17 May 2026 23:50:43 -0700 Subject: [PATCH 4/4] v0.1.10-alpha.4 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 99c0cc4..afd8b81 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.1.10-alpha.3 \ No newline at end of file +0.1.10-alpha.4