mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 00:36:56 +02:00
Enable auxiliary columns for rescore, IVF, and DiskANN indexes
The constructor previously rejected auxiliary columns (+col) for all non-flat index types. Analysis confirms all code paths already handle aux columns correctly — aux data lives in _auxiliary shadow table, independent of the vector index structures. Remove the three auxiliary column guards. Metadata and partition key guards remain in place (separate analysis needed). Adds 8 snapshot-based tests covering shadow table creation, insert+KNN returning aux values, aux UPDATE, aux DELETE cleanup, and DROP TABLE for both rescore and DiskANN. IVF aux verified with IVF-enabled build. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
01b4b2a965
commit
b7fc459be4
6 changed files with 597 additions and 37 deletions
16
sqlite-vec.c
16
sqlite-vec.c
|
|
@ -5149,11 +5149,6 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (hasRescore) {
|
if (hasRescore) {
|
||||||
if (numAuxiliaryColumns > 0) {
|
|
||||||
*pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
|
|
||||||
"Auxiliary columns are not supported with rescore indexes");
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (numMetadataColumns > 0) {
|
if (numMetadataColumns > 0) {
|
||||||
*pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
|
*pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
|
||||||
"Metadata columns are not supported with rescore indexes");
|
"Metadata columns are not supported with rescore indexes");
|
||||||
|
|
@ -5183,11 +5178,6 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv,
|
||||||
"partition key columns are not supported with IVF indexes");
|
"partition key columns are not supported with IVF indexes");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
if (numAuxiliaryColumns > 0) {
|
|
||||||
*pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
|
|
||||||
"auxiliary columns are not supported with IVF indexes");
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (numMetadataColumns > 0) {
|
if (numMetadataColumns > 0) {
|
||||||
*pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
|
*pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
|
||||||
"metadata columns are not supported with IVF indexes");
|
"metadata columns are not supported with IVF indexes");
|
||||||
|
|
@ -5199,12 +5189,6 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv,
|
||||||
// DiskANN columns cannot coexist with aux/metadata/partition columns
|
// DiskANN columns cannot coexist with aux/metadata/partition columns
|
||||||
for (int i = 0; i < numVectorColumns; i++) {
|
for (int i = 0; i < numVectorColumns; i++) {
|
||||||
if (pNew->vector_columns[i].index_type == VEC0_INDEX_TYPE_DISKANN) {
|
if (pNew->vector_columns[i].index_type == VEC0_INDEX_TYPE_DISKANN) {
|
||||||
if (numAuxiliaryColumns > 0) {
|
|
||||||
*pzErr = sqlite3_mprintf(
|
|
||||||
VEC_CONSTRUCTOR_ERROR
|
|
||||||
"Auxiliary columns are not supported with DiskANN-indexed vector columns");
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (numMetadataColumns > 0) {
|
if (numMetadataColumns > 0) {
|
||||||
*pzErr = sqlite3_mprintf(
|
*pzErr = sqlite3_mprintf(
|
||||||
VEC_CONSTRUCTOR_ERROR
|
VEC_CONSTRUCTOR_ERROR
|
||||||
|
|
|
||||||
File diff suppressed because one or more lines are too long
|
|
@ -1,5 +1,7 @@
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from helpers import exec, vec0_shadow_table_contents
|
import struct
|
||||||
|
import pytest
|
||||||
|
from helpers import exec, vec0_shadow_table_contents, _f32
|
||||||
|
|
||||||
|
|
||||||
def test_constructor_limit(db, snapshot):
|
def test_constructor_limit(db, snapshot):
|
||||||
|
|
@ -126,3 +128,198 @@ def test_knn(db, snapshot):
|
||||||
) == snapshot(name="illegal KNN w/ aux")
|
) == snapshot(name="illegal KNN w/ aux")
|
||||||
|
|
||||||
|
|
||||||
|
# ======================================================================
|
||||||
|
# Auxiliary columns with non-flat indexes
|
||||||
|
# ======================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def test_rescore_aux_shadow_tables(db, snapshot):
|
||||||
|
"""Rescore + aux column: verify shadow tables are created correctly."""
|
||||||
|
db.execute(
|
||||||
|
"CREATE VIRTUAL TABLE t USING vec0("
|
||||||
|
" emb float[128] indexed by rescore(quantizer=bit),"
|
||||||
|
" +label text,"
|
||||||
|
" +score float"
|
||||||
|
")"
|
||||||
|
)
|
||||||
|
assert exec(db, "SELECT name, sql FROM sqlite_master WHERE type='table' AND name LIKE 't_%' ORDER BY name") == snapshot(
|
||||||
|
name="rescore aux shadow tables"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_rescore_aux_insert_knn(db, snapshot):
|
||||||
|
"""Insert with aux data, KNN should return aux column values."""
|
||||||
|
db.execute(
|
||||||
|
"CREATE VIRTUAL TABLE t USING vec0("
|
||||||
|
" emb float[128] indexed by rescore(quantizer=bit),"
|
||||||
|
" +label text"
|
||||||
|
")"
|
||||||
|
)
|
||||||
|
import random
|
||||||
|
random.seed(77)
|
||||||
|
data = [
|
||||||
|
("alpha", [random.gauss(0, 1) for _ in range(128)]),
|
||||||
|
("beta", [random.gauss(0, 1) for _ in range(128)]),
|
||||||
|
("gamma", [random.gauss(0, 1) for _ in range(128)]),
|
||||||
|
]
|
||||||
|
for label, vec in data:
|
||||||
|
db.execute(
|
||||||
|
"INSERT INTO t(emb, label) VALUES (?, ?)",
|
||||||
|
[_f32(vec), label],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert exec(db, "SELECT rowid, label FROM t ORDER BY rowid") == snapshot(
|
||||||
|
name="rescore aux select all"
|
||||||
|
)
|
||||||
|
assert vec0_shadow_table_contents(db, "t", skip_info=True) == snapshot(
|
||||||
|
name="rescore aux shadow contents"
|
||||||
|
)
|
||||||
|
|
||||||
|
# KNN should include aux column, "alpha" closest to its own vector
|
||||||
|
rows = db.execute(
|
||||||
|
"SELECT label, distance FROM t WHERE emb MATCH ? ORDER BY distance LIMIT 3",
|
||||||
|
[_f32(data[0][1])],
|
||||||
|
).fetchall()
|
||||||
|
assert len(rows) == 3
|
||||||
|
assert rows[0][0] == "alpha"
|
||||||
|
|
||||||
|
|
||||||
|
def test_rescore_aux_update(db):
|
||||||
|
"""UPDATE aux column on rescore table should work without affecting vectors."""
|
||||||
|
db.execute(
|
||||||
|
"CREATE VIRTUAL TABLE t USING vec0("
|
||||||
|
" emb float[128] indexed by rescore(quantizer=bit),"
|
||||||
|
" +label text"
|
||||||
|
")"
|
||||||
|
)
|
||||||
|
import random
|
||||||
|
random.seed(88)
|
||||||
|
vec = [random.gauss(0, 1) for _ in range(128)]
|
||||||
|
db.execute("INSERT INTO t(rowid, emb, label) VALUES (1, ?, 'original')", [_f32(vec)])
|
||||||
|
db.execute("UPDATE t SET label = 'updated' WHERE rowid = 1")
|
||||||
|
|
||||||
|
assert db.execute("SELECT label FROM t WHERE rowid = 1").fetchone()[0] == "updated"
|
||||||
|
|
||||||
|
# KNN still works with updated aux
|
||||||
|
rows = db.execute(
|
||||||
|
"SELECT rowid, label FROM t WHERE emb MATCH ? ORDER BY distance LIMIT 1",
|
||||||
|
[_f32(vec)],
|
||||||
|
).fetchall()
|
||||||
|
assert rows[0][0] == 1
|
||||||
|
assert rows[0][1] == "updated"
|
||||||
|
|
||||||
|
|
||||||
|
def test_rescore_aux_delete(db, snapshot):
|
||||||
|
"""DELETE should remove aux data from shadow table."""
|
||||||
|
db.execute(
|
||||||
|
"CREATE VIRTUAL TABLE t USING vec0("
|
||||||
|
" emb float[128] indexed by rescore(quantizer=bit),"
|
||||||
|
" +label text"
|
||||||
|
")"
|
||||||
|
)
|
||||||
|
import random
|
||||||
|
random.seed(99)
|
||||||
|
for i in range(5):
|
||||||
|
db.execute(
|
||||||
|
"INSERT INTO t(rowid, emb, label) VALUES (?, ?, ?)",
|
||||||
|
[i + 1, _f32([random.gauss(0, 1) for _ in range(128)]), f"item-{i+1}"],
|
||||||
|
)
|
||||||
|
|
||||||
|
db.execute("DELETE FROM t WHERE rowid = 3")
|
||||||
|
|
||||||
|
assert exec(db, "SELECT rowid, label FROM t ORDER BY rowid") == snapshot(
|
||||||
|
name="rescore aux after delete"
|
||||||
|
)
|
||||||
|
assert exec(db, "SELECT rowid, value00 FROM t_auxiliary ORDER BY rowid") == snapshot(
|
||||||
|
name="rescore aux shadow after delete"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_diskann_aux_shadow_tables(db, snapshot):
|
||||||
|
"""DiskANN + aux column: verify shadow tables are created correctly."""
|
||||||
|
db.execute("""
|
||||||
|
CREATE VIRTUAL TABLE t USING vec0(
|
||||||
|
emb float[8] INDEXED BY diskann(neighbor_quantizer=binary, n_neighbors=8),
|
||||||
|
+label text,
|
||||||
|
+score float
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
assert exec(db, "SELECT name, sql FROM sqlite_master WHERE type='table' AND name LIKE 't_%' ORDER BY name") == snapshot(
|
||||||
|
name="diskann aux shadow tables"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_diskann_aux_insert_knn(db, snapshot):
|
||||||
|
"""DiskANN + aux: insert, KNN, verify aux values returned."""
|
||||||
|
db.execute("""
|
||||||
|
CREATE VIRTUAL TABLE t USING vec0(
|
||||||
|
emb float[8] INDEXED BY diskann(neighbor_quantizer=binary, n_neighbors=8),
|
||||||
|
+label text
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
data = [
|
||||||
|
("red", [1, 0, 0, 0, 0, 0, 0, 0]),
|
||||||
|
("green", [0, 1, 0, 0, 0, 0, 0, 0]),
|
||||||
|
("blue", [0, 0, 1, 0, 0, 0, 0, 0]),
|
||||||
|
]
|
||||||
|
for label, vec in data:
|
||||||
|
db.execute("INSERT INTO t(emb, label) VALUES (?, ?)", [_f32(vec), label])
|
||||||
|
|
||||||
|
assert exec(db, "SELECT rowid, label FROM t ORDER BY rowid") == snapshot(
|
||||||
|
name="diskann aux select all"
|
||||||
|
)
|
||||||
|
assert vec0_shadow_table_contents(db, "t", skip_info=True) == snapshot(
|
||||||
|
name="diskann aux shadow contents"
|
||||||
|
)
|
||||||
|
|
||||||
|
rows = db.execute(
|
||||||
|
"SELECT label, distance FROM t WHERE emb MATCH ? AND k = 3",
|
||||||
|
[_f32([1, 0, 0, 0, 0, 0, 0, 0])],
|
||||||
|
).fetchall()
|
||||||
|
assert len(rows) >= 1
|
||||||
|
assert rows[0][0] == "red"
|
||||||
|
|
||||||
|
|
||||||
|
def test_diskann_aux_update_and_delete(db, snapshot):
|
||||||
|
"""DiskANN + aux: update aux column, delete row, verify cleanup."""
|
||||||
|
db.execute("""
|
||||||
|
CREATE VIRTUAL TABLE t USING vec0(
|
||||||
|
emb float[8] INDEXED BY diskann(neighbor_quantizer=binary, n_neighbors=8),
|
||||||
|
+label text
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
for i in range(5):
|
||||||
|
vec = [0.0] * 8
|
||||||
|
vec[i % 8] = 1.0
|
||||||
|
db.execute(
|
||||||
|
"INSERT INTO t(rowid, emb, label) VALUES (?, ?, ?)",
|
||||||
|
[i + 1, _f32(vec), f"item-{i+1}"],
|
||||||
|
)
|
||||||
|
|
||||||
|
db.execute("UPDATE t SET label = 'UPDATED' WHERE rowid = 2")
|
||||||
|
db.execute("DELETE FROM t WHERE rowid = 3")
|
||||||
|
|
||||||
|
assert exec(db, "SELECT rowid, label FROM t ORDER BY rowid") == snapshot(
|
||||||
|
name="diskann aux after update+delete"
|
||||||
|
)
|
||||||
|
assert exec(db, "SELECT rowid, value00 FROM t_auxiliary ORDER BY rowid") == snapshot(
|
||||||
|
name="diskann aux shadow after update+delete"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_diskann_aux_drop_cleans_all(db):
|
||||||
|
"""DROP TABLE should remove aux shadow table too."""
|
||||||
|
db.execute("""
|
||||||
|
CREATE VIRTUAL TABLE t USING vec0(
|
||||||
|
emb float[8] INDEXED BY diskann(neighbor_quantizer=binary, n_neighbors=8),
|
||||||
|
+label text
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
db.execute("INSERT INTO t(emb, label) VALUES (?, 'test')", [_f32([1]*8)])
|
||||||
|
db.execute("DROP TABLE t")
|
||||||
|
|
||||||
|
tables = [r[0] for r in db.execute(
|
||||||
|
"SELECT name FROM sqlite_master WHERE name LIKE 't_%'"
|
||||||
|
).fetchall()]
|
||||||
|
assert "t_auxiliary" not in tables
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -630,16 +630,19 @@ def test_diskann_command_search_list_size_error(db):
|
||||||
# Error cases: DiskANN + auxiliary/metadata/partition columns
|
# Error cases: DiskANN + auxiliary/metadata/partition columns
|
||||||
# ======================================================================
|
# ======================================================================
|
||||||
|
|
||||||
def test_diskann_create_error_with_auxiliary_column(db):
|
def test_diskann_create_with_auxiliary_column(db):
|
||||||
"""DiskANN tables should not support auxiliary columns."""
|
"""DiskANN tables should support auxiliary columns."""
|
||||||
result = exec(db, """
|
db.execute("""
|
||||||
CREATE VIRTUAL TABLE t USING vec0(
|
CREATE VIRTUAL TABLE t USING vec0(
|
||||||
emb float[64] INDEXED BY diskann(neighbor_quantizer=binary),
|
emb float[64] INDEXED BY diskann(neighbor_quantizer=binary),
|
||||||
+extra text
|
+extra text
|
||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
assert "error" in result
|
# Auxiliary shadow table should exist
|
||||||
assert "auxiliary" in result["message"].lower() or "Auxiliary" in result["message"]
|
tables = [r[0] for r in db.execute(
|
||||||
|
"SELECT name FROM sqlite_master WHERE name LIKE 't_%' ORDER BY 1"
|
||||||
|
).fetchall()]
|
||||||
|
assert "t_auxiliary" in tables
|
||||||
|
|
||||||
|
|
||||||
def test_diskann_create_error_with_metadata_column(db):
|
def test_diskann_create_error_with_metadata_column(db):
|
||||||
|
|
|
||||||
|
|
@ -203,13 +203,15 @@ def test_update_vector_via_delete_insert(db):
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
def test_error_ivf_with_auxiliary_column(db):
|
def test_ivf_with_auxiliary_column(db):
|
||||||
result = exec(
|
"""IVF should support auxiliary columns."""
|
||||||
db,
|
db.execute(
|
||||||
"CREATE VIRTUAL TABLE t USING vec0(v float[4] indexed by ivf(), +extra text)",
|
"CREATE VIRTUAL TABLE t USING vec0(v float[4] indexed by ivf(), +extra text)"
|
||||||
)
|
)
|
||||||
assert "error" in result
|
tables = [r[0] for r in db.execute(
|
||||||
assert "auxiliary" in result.get("message", "").lower()
|
"SELECT name FROM sqlite_master WHERE name LIKE 't_%' ORDER BY 1"
|
||||||
|
).fetchall()]
|
||||||
|
assert "t_auxiliary" in tables
|
||||||
|
|
||||||
|
|
||||||
def test_error_ivf_with_metadata_column(db):
|
def test_error_ivf_with_metadata_column(db):
|
||||||
|
|
|
||||||
|
|
@ -32,15 +32,18 @@ def unpack_float_vec(blob):
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
def test_create_error_with_aux_column(db):
|
def test_create_with_aux_column(db):
|
||||||
"""Rescore should reject auxiliary columns."""
|
"""Rescore should support auxiliary columns."""
|
||||||
with pytest.raises(sqlite3.OperationalError, match="Auxiliary columns"):
|
|
||||||
db.execute(
|
db.execute(
|
||||||
"CREATE VIRTUAL TABLE t USING vec0("
|
"CREATE VIRTUAL TABLE t USING vec0("
|
||||||
" embedding float[8] indexed by rescore(quantizer=bit),"
|
" embedding float[128] indexed by rescore(quantizer=bit),"
|
||||||
" +extra text"
|
" +extra text"
|
||||||
")"
|
")"
|
||||||
)
|
)
|
||||||
|
tables = [r[0] for r in db.execute(
|
||||||
|
"SELECT name FROM sqlite_master WHERE name LIKE 't_%' ORDER BY 1"
|
||||||
|
).fetchall()]
|
||||||
|
assert "t_auxiliary" in tables
|
||||||
|
|
||||||
|
|
||||||
def test_create_error_with_metadata_column(db):
|
def test_create_error_with_metadata_column(db):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue