mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-06-14 15:25:18 +02:00
Compare commits
4 commits
5778fecfeb
...
04d28bd217
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
04d28bd217 | ||
|
|
fe941716ad | ||
|
|
8b81f40d1e | ||
|
|
8105eee61e |
5 changed files with 219 additions and 27 deletions
2
VERSION
2
VERSION
|
|
@ -1 +1 @@
|
|||
0.1.10-alpha.3
|
||||
0.1.10-alpha.4
|
||||
|
|
|
|||
50
sqlite-vec.c
50
sqlite-vec.c
|
|
@ -3695,13 +3695,15 @@ void vec0_free_resources(vec0_vtab *p) {
|
|||
sqlite3_finalize(p->stmtIvfRowidMapLookup[i]); p->stmtIvfRowidMapLookup[i] = NULL;
|
||||
sqlite3_finalize(p->stmtIvfRowidMapDelete[i]); p->stmtIvfRowidMapDelete[i] = NULL;
|
||||
sqlite3_finalize(p->stmtIvfCentroidsAll[i]); p->stmtIvfCentroidsAll[i] = NULL;
|
||||
}
|
||||
#endif
|
||||
#if SQLITE_VEC_ENABLE_DISKANN
|
||||
for (int i = 0; i < VEC0_MAX_VECTOR_COLUMNS; i++) {
|
||||
sqlite3_finalize(p->stmtDiskannNodeRead[i]); p->stmtDiskannNodeRead[i] = NULL;
|
||||
sqlite3_finalize(p->stmtDiskannNodeWrite[i]); p->stmtDiskannNodeWrite[i] = NULL;
|
||||
sqlite3_finalize(p->stmtDiskannNodeInsert[i]); p->stmtDiskannNodeInsert[i] = NULL;
|
||||
sqlite3_finalize(p->stmtVectorsRead[i]); p->stmtVectorsRead[i] = NULL;
|
||||
sqlite3_finalize(p->stmtVectorsInsert[i]); p->stmtVectorsInsert[i] = NULL;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
@ -10370,28 +10372,7 @@ static int vec0Begin(sqlite3_vtab *pVTab) {
|
|||
return SQLITE_OK;
|
||||
}
|
||||
static int vec0Sync(sqlite3_vtab *pVTab) {
|
||||
UNUSED_PARAMETER(pVTab);
|
||||
vec0_vtab *p = (vec0_vtab *)pVTab;
|
||||
if (p->stmtLatestChunk) {
|
||||
sqlite3_finalize(p->stmtLatestChunk);
|
||||
p->stmtLatestChunk = NULL;
|
||||
}
|
||||
if (p->stmtRowidsInsertRowid) {
|
||||
sqlite3_finalize(p->stmtRowidsInsertRowid);
|
||||
p->stmtRowidsInsertRowid = NULL;
|
||||
}
|
||||
if (p->stmtRowidsInsertId) {
|
||||
sqlite3_finalize(p->stmtRowidsInsertId);
|
||||
p->stmtRowidsInsertId = NULL;
|
||||
}
|
||||
if (p->stmtRowidsUpdatePosition) {
|
||||
sqlite3_finalize(p->stmtRowidsUpdatePosition);
|
||||
p->stmtRowidsUpdatePosition = NULL;
|
||||
}
|
||||
if (p->stmtRowidsGetChunkPosition) {
|
||||
sqlite3_finalize(p->stmtRowidsGetChunkPosition);
|
||||
p->stmtRowidsGetChunkPosition = NULL;
|
||||
}
|
||||
vec0_free_resources((vec0_vtab *)pVTab);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
static int vec0Commit(sqlite3_vtab *pVTab) {
|
||||
|
|
@ -10435,9 +10416,13 @@ static int vec0Rename(sqlite3_vtab *pVtab, const char *zNew) {
|
|||
|
||||
// Per-vector-column shadow tables
|
||||
for (int i = 0; i < p->numVectorColumns; i++) {
|
||||
sqlite3_str_appendf(s,
|
||||
"ALTER TABLE \"%w\".\"%w_vector_chunks%02d\" RENAME TO \"%w_vector_chunks%02d\";",
|
||||
p->schemaName, p->tableName, i, zNew, i);
|
||||
// Non-FLAT columns (rescore, IVF, DiskANN) don't create _vector_chunks
|
||||
// (mirror the guard in vec0_init around VEC0_SHADOW_VECTOR_N_CREATE).
|
||||
if (p->vector_columns[i].index_type == VEC0_INDEX_TYPE_FLAT) {
|
||||
sqlite3_str_appendf(s,
|
||||
"ALTER TABLE \"%w\".\"%w_vector_chunks%02d\" RENAME TO \"%w_vector_chunks%02d\";",
|
||||
p->schemaName, p->tableName, i, zNew, i);
|
||||
}
|
||||
|
||||
#if SQLITE_VEC_ENABLE_RESCORE
|
||||
if (p->shadowRescoreChunksNames[i]) {
|
||||
|
|
@ -10468,9 +10453,22 @@ static int vec0Rename(sqlite3_vtab *pVtab, const char *zNew) {
|
|||
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
|
||||
for (int i = 0; i < p->numVectorColumns; i++) {
|
||||
if (p->shadowIvfCellsNames[i]) {
|
||||
sqlite3_str_appendf(s,
|
||||
"ALTER TABLE \"%w\".\"%w_ivf_centroids%02d\" RENAME TO \"%w_ivf_centroids%02d\";",
|
||||
p->schemaName, p->tableName, i, zNew, i);
|
||||
sqlite3_str_appendf(s,
|
||||
"ALTER TABLE \"%w\".\"%w_ivf_cells%02d\" RENAME TO \"%w_ivf_cells%02d\";",
|
||||
p->schemaName, p->tableName, i, zNew, i);
|
||||
sqlite3_str_appendf(s,
|
||||
"ALTER TABLE \"%w\".\"%w_ivf_rowid_map%02d\" RENAME TO \"%w_ivf_rowid_map%02d\";",
|
||||
p->schemaName, p->tableName, i, zNew, i);
|
||||
// _ivf_vectors is only created when quantizer != none
|
||||
// (mirror ivf_create_shadow_tables in sqlite-vec-ivf.c).
|
||||
if (p->vector_columns[i].ivf.quantizer != VEC0_IVF_QUANTIZER_NONE) {
|
||||
sqlite3_str_appendf(s,
|
||||
"ALTER TABLE \"%w\".\"%w_ivf_vectors%02d\" RENAME TO \"%w_ivf_vectors%02d\";",
|
||||
p->schemaName, p->tableName, i, zNew, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
34
tests/test-cache-finalize.py
Normal file
34
tests/test-cache-finalize.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
"""Regression tests for #295: vec0 must finalize cached prepared statements
|
||||
on every commit, not just the rowid subset.
|
||||
|
||||
Before the fix, `vec0Sync` only finalized `stmtLatestChunk` and the four
|
||||
`stmtRowids*` stmts; the DiskANN/IVF/vectors-read stmts persisted on the
|
||||
vtab indefinitely. Symptom: VACUUM after any DiskANN operation failed with
|
||||
"SQL statements in progress" because the cached stmts kept the connection
|
||||
busy. (The same leak also caused `sqlite3_close()` non-v2 to return
|
||||
SQLITE_BUSY — the original Firefox case in issue #295.)
|
||||
|
||||
A separate latent bug — the DiskANN finalize block in `vec0_free_resources`
|
||||
was nested inside `#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE`, so even
|
||||
xDisconnect/xDestroy didn't finalize DiskANN stmts in the default build.
|
||||
"""
|
||||
from helpers import _f32
|
||||
|
||||
|
||||
def test_vacuum_after_diskann_inserts(db):
|
||||
db.execute(
|
||||
"create virtual table v using vec0("
|
||||
"a float[8] indexed by diskann(neighbor_quantizer=binary))"
|
||||
)
|
||||
for i in range(1, 11):
|
||||
db.execute("insert into v(rowid, a) values (?, ?)",
|
||||
(i, _f32([0.1 * i] * 8)))
|
||||
db.commit()
|
||||
db.execute("VACUUM")
|
||||
|
||||
|
||||
def test_vacuum_after_flat_inserts(db):
|
||||
db.execute("create virtual table v using vec0(a float[2])")
|
||||
db.execute("insert into v(rowid, a) values (1, ?)", (_f32([0.1, 0.2]),))
|
||||
db.commit()
|
||||
db.execute("VACUUM")
|
||||
98
tests/test-ivf-rename.py
Normal file
98
tests/test-ivf-rename.py
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
import sqlite3
|
||||
import pytest
|
||||
from helpers import _f32
|
||||
|
||||
|
||||
def _shadow_tables(db, prefix):
|
||||
"""Return sorted list of shadow table names for a given prefix."""
|
||||
return sorted([
|
||||
row[0] for row in db.execute(
|
||||
r"select name from sqlite_master where name like ? escape '\' and type='table' order by 1",
|
||||
[f"{prefix}\\__%"],
|
||||
).fetchall()
|
||||
])
|
||||
|
||||
|
||||
def test_rename_ivf_no_quantizer(db):
|
||||
"""Rename should rename all IVF shadow tables (_ivf_centroids, _ivf_cells,
|
||||
_ivf_rowid_map). quantizer=none — no _ivf_vectors table."""
|
||||
db.execute("""
|
||||
CREATE VIRTUAL TABLE v USING vec0(
|
||||
a float[4] indexed by ivf(nlist=2, quantizer=none)
|
||||
)
|
||||
""")
|
||||
db.execute("insert into v(rowid, a) values (1, ?)", [_f32([0.1] * 4)])
|
||||
db.execute("insert into v(rowid, a) values (2, ?)", [_f32([0.9] * 4)])
|
||||
|
||||
before = _shadow_tables(db, "v")
|
||||
assert "v_ivf_centroids00" in before
|
||||
assert "v_ivf_cells00" in before
|
||||
assert "v_ivf_rowid_map00" in before
|
||||
assert "v_ivf_vectors00" not in before # quantizer=none -> no _ivf_vectors
|
||||
assert "v_vector_chunks00" not in before
|
||||
|
||||
db.execute("ALTER TABLE v RENAME TO v2")
|
||||
|
||||
# Querying the renamed table should still work — it hits _ivf_cells,
|
||||
# _ivf_centroids (when trained), and _ivf_rowid_map.
|
||||
rows = db.execute(
|
||||
"select rowid from v2 where a match ? and k=10",
|
||||
[_f32([0.1] * 4)],
|
||||
).fetchall()
|
||||
assert any(r[0] == 1 for r in rows)
|
||||
|
||||
after = _shadow_tables(db, "v2")
|
||||
assert "v2_ivf_centroids00" in after
|
||||
assert "v2_ivf_cells00" in after
|
||||
assert "v2_ivf_rowid_map00" in after
|
||||
|
||||
# No old shadow tables should remain
|
||||
assert _shadow_tables(db, "v") == []
|
||||
|
||||
|
||||
def test_rename_ivf_quantizer_binary(db):
|
||||
"""Rename should also rename _ivf_vectors when quantizer != none."""
|
||||
db.execute("""
|
||||
CREATE VIRTUAL TABLE v USING vec0(
|
||||
a float[8] indexed by ivf(nlist=2, quantizer=binary)
|
||||
)
|
||||
""")
|
||||
db.execute("insert into v(rowid, a) values (1, ?)", [_f32([0.1] * 8)])
|
||||
|
||||
before = _shadow_tables(db, "v")
|
||||
assert "v_ivf_centroids00" in before
|
||||
assert "v_ivf_cells00" in before
|
||||
assert "v_ivf_rowid_map00" in before
|
||||
assert "v_ivf_vectors00" in before # quantizer=binary creates _ivf_vectors
|
||||
|
||||
db.execute("ALTER TABLE v RENAME TO v2")
|
||||
|
||||
rows = db.execute(
|
||||
"select rowid from v2 where a match ? and k=10",
|
||||
[_f32([0.1] * 8)],
|
||||
).fetchall()
|
||||
assert rows[0][0] == 1
|
||||
|
||||
after = _shadow_tables(db, "v2")
|
||||
assert "v2_ivf_centroids00" in after
|
||||
assert "v2_ivf_cells00" in after
|
||||
assert "v2_ivf_rowid_map00" in after
|
||||
assert "v2_ivf_vectors00" in after
|
||||
|
||||
assert _shadow_tables(db, "v") == []
|
||||
|
||||
|
||||
def test_rename_ivf_drop_after(db):
|
||||
"""DROP TABLE on a renamed IVF table must drop every shadow table — leftover
|
||||
shadows from a half-renamed IVF index would orphan tables in the schema."""
|
||||
db.execute("""
|
||||
CREATE VIRTUAL TABLE v USING vec0(
|
||||
a float[8] indexed by ivf(nlist=2, quantizer=binary)
|
||||
)
|
||||
""")
|
||||
db.execute("insert into v(rowid, a) values (1, ?)", [_f32([0.1] * 8)])
|
||||
db.execute("ALTER TABLE v RENAME TO v2")
|
||||
db.execute("DROP TABLE v2")
|
||||
|
||||
assert _shadow_tables(db, "v") == []
|
||||
assert _shadow_tables(db, "v2") == []
|
||||
|
|
@ -162,6 +162,68 @@ def test_rename_with_metadata(db):
|
|||
assert _shadow_tables(db, "v") == []
|
||||
|
||||
|
||||
def test_rename_diskann(db):
|
||||
"""Rename should work on DiskANN-indexed tables (no _vector_chunks shadow)."""
|
||||
db.execute("""
|
||||
CREATE VIRTUAL TABLE v USING vec0(
|
||||
a float[8] INDEXED BY diskann(neighbor_quantizer=binary)
|
||||
)
|
||||
""")
|
||||
db.execute("insert into v(rowid, a) values (1, ?)", [_f32([0.1] * 8)])
|
||||
|
||||
# DiskANN columns use _vectors / _diskann_nodes / _diskann_buffer instead
|
||||
# of _vector_chunks; the rename must skip the missing _vector_chunks ALTER.
|
||||
before = _shadow_tables(db, "v")
|
||||
assert "v_diskann_nodes00" in before
|
||||
assert "v_vector_chunks00" not in before
|
||||
|
||||
db.execute("ALTER TABLE v RENAME TO v2")
|
||||
|
||||
rows = db.execute(
|
||||
"select rowid from v2 where a match ? and k=10",
|
||||
[_f32([0.1] * 8)],
|
||||
).fetchall()
|
||||
assert rows[0][0] == 1
|
||||
|
||||
after = _shadow_tables(db, "v2")
|
||||
assert "v2_diskann_nodes00" in after
|
||||
assert "v2_vector_chunks00" not in after
|
||||
assert _shadow_tables(db, "v") == []
|
||||
|
||||
|
||||
def test_rename_rescore(db):
|
||||
"""Rename should work on rescore-indexed tables (no _vector_chunks shadow)."""
|
||||
db.execute("""
|
||||
CREATE VIRTUAL TABLE v USING vec0(
|
||||
a float[8] indexed by rescore(quantizer=bit)
|
||||
)
|
||||
""")
|
||||
db.execute("insert into v(rowid, a) values (1, ?)", [_f32([0.1] * 8)])
|
||||
db.execute("insert into v(rowid, a) values (2, ?)", [_f32([0.9] * 8)])
|
||||
|
||||
# Rescore columns use _rescore_chunks / _rescore_vectors instead of
|
||||
# _vector_chunks; the rename must skip the missing _vector_chunks ALTER
|
||||
# and rename both rescore shadow tables.
|
||||
before = _shadow_tables(db, "v")
|
||||
assert "v_rescore_chunks00" in before
|
||||
assert "v_rescore_vectors00" in before
|
||||
assert "v_vector_chunks00" not in before
|
||||
|
||||
db.execute("ALTER TABLE v RENAME TO v2")
|
||||
|
||||
rows = db.execute(
|
||||
"select rowid from v2 where a match ? and k=10",
|
||||
[_f32([0.1] * 8)],
|
||||
).fetchall()
|
||||
assert rows[0][0] == 1
|
||||
|
||||
after = _shadow_tables(db, "v2")
|
||||
assert "v2_rescore_chunks00" in after
|
||||
assert "v2_rescore_vectors00" in after
|
||||
assert "v2_vector_chunks00" not in after
|
||||
assert _shadow_tables(db, "v") == []
|
||||
|
||||
|
||||
def test_rename_drop_after(db):
|
||||
"""DROP TABLE should work on a renamed table."""
|
||||
db.execute("create virtual table v using vec0(a float[2], chunk_size=8)")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue