Add FTS5-style command column and runtime oversample for rescore

Replace the old INSERT INTO t(rowid) VALUES('command') hack with a
proper hidden command column named after the table (FTS5 pattern):

  INSERT INTO t(t) VALUES ('oversample=16')

The command column is the first hidden column (before distance and k)
to reserve ability for future table-valued function argument use.

Schema: CREATE TABLE x(rowid, <cols>, "<table>" hidden, distance hidden, k hidden)

For backwards compat, pre-v0.1.10 tables (detected via _info shadow
table version) skip the command column to avoid name conflicts with
user columns that may share the table's name. Verified with legacy
fixture DB generated by sqlite-vec v0.1.6.

Changes:
- Add hidden command column to sqlite3_declare_vtab for new tables
- Version-gate via _info shadow table for existing tables
- Validate at CREATE time that no column name matches table name
- Add rescore_handle_command() with oversample=N support
- rescore_knn() prefers runtime oversample_search over CREATE default
- Remove old rowid-based command dispatch
- Migrate all DiskANN/IVF/fuzz tests and benchmarks to new syntax
- Add legacy DB fixture (v0.1.6) and 9 backwards-compat tests

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alex Garcia 2026-03-31 22:39:18 -07:00
parent b7fc459be4
commit 6e2c4c6bab
21 changed files with 512 additions and 105 deletions

138
tests/test-legacy-compat.py Normal file
View file

@ -0,0 +1,138 @@
"""Backwards compatibility tests: current sqlite-vec reading legacy databases.
The fixture file tests/fixtures/legacy-v0.1.6.db was generated by
tests/generate_legacy_db.py using sqlite-vec v0.1.6. These tests verify
that the current version can fully read, query, insert into, and delete
from tables created by older versions.
"""
import sqlite3
import struct
import os
import shutil
import pytest
FIXTURE_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "legacy-v0.1.6.db")
def _f32(vals):
return struct.pack(f"{len(vals)}f", *vals)
@pytest.fixture()
def legacy_db(tmp_path):
"""Copy the legacy fixture to a temp dir so tests can modify it."""
if not os.path.exists(FIXTURE_PATH):
pytest.skip("Legacy fixture not found — run: uv run --script tests/generate_legacy_db.py")
db_path = str(tmp_path / "legacy.db")
shutil.copy2(FIXTURE_PATH, db_path)
db = sqlite3.connect(db_path)
db.row_factory = sqlite3.Row
db.enable_load_extension(True)
db.load_extension("dist/vec0")
return db
def test_legacy_select_count(legacy_db):
"""Basic SELECT count should return all rows."""
count = legacy_db.execute("SELECT count(*) FROM legacy_vectors").fetchone()[0]
assert count == 50
def test_legacy_point_query(legacy_db):
"""Point query by rowid should return correct vector."""
row = legacy_db.execute(
"SELECT rowid, emb FROM legacy_vectors WHERE rowid = 1"
).fetchone()
assert row["rowid"] == 1
vec = struct.unpack("4f", row["emb"])
assert vec[0] == pytest.approx(1.0)
def test_legacy_knn(legacy_db):
"""KNN query on legacy table should return correct results."""
query = _f32([1.0, 0.0, 0.0, 0.0])
rows = legacy_db.execute(
"SELECT rowid, distance FROM legacy_vectors "
"WHERE emb MATCH ? AND k = 5",
[query],
).fetchall()
assert len(rows) == 5
assert rows[0]["rowid"] == 1
assert rows[0]["distance"] == pytest.approx(0.0)
for i in range(len(rows) - 1):
assert rows[i]["distance"] <= rows[i + 1]["distance"]
def test_legacy_insert(legacy_db):
"""INSERT into legacy table should work."""
legacy_db.execute(
"INSERT INTO legacy_vectors(rowid, emb) VALUES (100, ?)",
[_f32([100.0, 0.0, 0.0, 0.0])],
)
count = legacy_db.execute("SELECT count(*) FROM legacy_vectors").fetchone()[0]
assert count == 51
rows = legacy_db.execute(
"SELECT rowid FROM legacy_vectors WHERE emb MATCH ? AND k = 1",
[_f32([100.0, 0.0, 0.0, 0.0])],
).fetchall()
assert rows[0]["rowid"] == 100
def test_legacy_delete(legacy_db):
"""DELETE from legacy table should work."""
legacy_db.execute("DELETE FROM legacy_vectors WHERE rowid = 1")
count = legacy_db.execute("SELECT count(*) FROM legacy_vectors").fetchone()[0]
assert count == 49
rows = legacy_db.execute(
"SELECT rowid FROM legacy_vectors WHERE emb MATCH ? AND k = 5",
[_f32([1.0, 0.0, 0.0, 0.0])],
).fetchall()
assert 1 not in [r["rowid"] for r in rows]
def test_legacy_fullscan(legacy_db):
"""Full scan should work."""
rows = legacy_db.execute(
"SELECT rowid FROM legacy_vectors ORDER BY rowid LIMIT 5"
).fetchall()
assert [r["rowid"] for r in rows] == [1, 2, 3, 4, 5]
def test_legacy_name_conflict_table(legacy_db):
"""Legacy table where column name == table name should work.
The v0.1.6 DB has: CREATE VIRTUAL TABLE emb USING vec0(emb float[4])
Current code should NOT add the command column for this table
(detected via _info version check), avoiding the name conflict.
"""
count = legacy_db.execute("SELECT count(*) FROM emb").fetchone()[0]
assert count == 10
rows = legacy_db.execute(
"SELECT rowid, distance FROM emb WHERE emb MATCH ? AND k = 3",
[_f32([1.0, 0.0, 0.0, 0.0])],
).fetchall()
assert len(rows) == 3
assert rows[0]["rowid"] == 1
def test_legacy_name_conflict_insert_delete(legacy_db):
"""INSERT and DELETE on legacy name-conflict table."""
legacy_db.execute(
"INSERT INTO emb(rowid, emb) VALUES (100, ?)",
[_f32([100.0, 0.0, 0.0, 0.0])],
)
assert legacy_db.execute("SELECT count(*) FROM emb").fetchone()[0] == 11
legacy_db.execute("DELETE FROM emb WHERE rowid = 5")
assert legacy_db.execute("SELECT count(*) FROM emb").fetchone()[0] == 10
def test_legacy_no_command_column(legacy_db):
"""Legacy tables should NOT have the command column."""
with pytest.raises(sqlite3.OperationalError):
legacy_db.execute(
"INSERT INTO legacy_vectors(legacy_vectors) VALUES ('some_command')"
)