mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-26 01:06:27 +02:00
Add FTS5-style command column and runtime oversample for rescore
Replace the old INSERT INTO t(rowid) VALUES('command') hack with a
proper hidden command column named after the table (FTS5 pattern):
INSERT INTO t(t) VALUES ('oversample=16')
The command column is the first hidden column (before distance and k)
to reserve ability for future table-valued function argument use.
Schema: CREATE TABLE x(rowid, <cols>, "<table>" hidden, distance hidden, k hidden)
For backwards compat, pre-v0.1.10 tables (detected via _info shadow
table version) skip the command column to avoid name conflicts with
user columns that may share the table's name. Verified with legacy
fixture DB generated by sqlite-vec v0.1.6.
Changes:
- Add hidden command column to sqlite3_declare_vtab for new tables
- Version-gate via _info shadow table for existing tables
- Validate at CREATE time that no column name matches table name
- Add rescore_handle_command() with oversample=N support
- rescore_knn() prefers runtime oversample_search over CREATE default
- Remove old rowid-based command dispatch
- Migrate all DiskANN/IVF/fuzz tests and benchmarks to new syntax
- Add legacy DB fixture (v0.1.6) and 9 backwards-compat tests
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b7fc459be4
commit
6e2c4c6bab
21 changed files with 512 additions and 105 deletions
138
tests/test-legacy-compat.py
Normal file
138
tests/test-legacy-compat.py
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
"""Backwards compatibility tests: current sqlite-vec reading legacy databases.
|
||||
|
||||
The fixture file tests/fixtures/legacy-v0.1.6.db was generated by
|
||||
tests/generate_legacy_db.py using sqlite-vec v0.1.6. These tests verify
|
||||
that the current version can fully read, query, insert into, and delete
|
||||
from tables created by older versions.
|
||||
"""
|
||||
import sqlite3
|
||||
import struct
|
||||
import os
|
||||
import shutil
|
||||
import pytest
|
||||
|
||||
FIXTURE_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "legacy-v0.1.6.db")
|
||||
|
||||
|
||||
def _f32(vals):
|
||||
return struct.pack(f"{len(vals)}f", *vals)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def legacy_db(tmp_path):
|
||||
"""Copy the legacy fixture to a temp dir so tests can modify it."""
|
||||
if not os.path.exists(FIXTURE_PATH):
|
||||
pytest.skip("Legacy fixture not found — run: uv run --script tests/generate_legacy_db.py")
|
||||
db_path = str(tmp_path / "legacy.db")
|
||||
shutil.copy2(FIXTURE_PATH, db_path)
|
||||
db = sqlite3.connect(db_path)
|
||||
db.row_factory = sqlite3.Row
|
||||
db.enable_load_extension(True)
|
||||
db.load_extension("dist/vec0")
|
||||
return db
|
||||
|
||||
|
||||
def test_legacy_select_count(legacy_db):
|
||||
"""Basic SELECT count should return all rows."""
|
||||
count = legacy_db.execute("SELECT count(*) FROM legacy_vectors").fetchone()[0]
|
||||
assert count == 50
|
||||
|
||||
|
||||
def test_legacy_point_query(legacy_db):
|
||||
"""Point query by rowid should return correct vector."""
|
||||
row = legacy_db.execute(
|
||||
"SELECT rowid, emb FROM legacy_vectors WHERE rowid = 1"
|
||||
).fetchone()
|
||||
assert row["rowid"] == 1
|
||||
vec = struct.unpack("4f", row["emb"])
|
||||
assert vec[0] == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_legacy_knn(legacy_db):
|
||||
"""KNN query on legacy table should return correct results."""
|
||||
query = _f32([1.0, 0.0, 0.0, 0.0])
|
||||
rows = legacy_db.execute(
|
||||
"SELECT rowid, distance FROM legacy_vectors "
|
||||
"WHERE emb MATCH ? AND k = 5",
|
||||
[query],
|
||||
).fetchall()
|
||||
assert len(rows) == 5
|
||||
assert rows[0]["rowid"] == 1
|
||||
assert rows[0]["distance"] == pytest.approx(0.0)
|
||||
for i in range(len(rows) - 1):
|
||||
assert rows[i]["distance"] <= rows[i + 1]["distance"]
|
||||
|
||||
|
||||
def test_legacy_insert(legacy_db):
|
||||
"""INSERT into legacy table should work."""
|
||||
legacy_db.execute(
|
||||
"INSERT INTO legacy_vectors(rowid, emb) VALUES (100, ?)",
|
||||
[_f32([100.0, 0.0, 0.0, 0.0])],
|
||||
)
|
||||
count = legacy_db.execute("SELECT count(*) FROM legacy_vectors").fetchone()[0]
|
||||
assert count == 51
|
||||
|
||||
rows = legacy_db.execute(
|
||||
"SELECT rowid FROM legacy_vectors WHERE emb MATCH ? AND k = 1",
|
||||
[_f32([100.0, 0.0, 0.0, 0.0])],
|
||||
).fetchall()
|
||||
assert rows[0]["rowid"] == 100
|
||||
|
||||
|
||||
def test_legacy_delete(legacy_db):
|
||||
"""DELETE from legacy table should work."""
|
||||
legacy_db.execute("DELETE FROM legacy_vectors WHERE rowid = 1")
|
||||
count = legacy_db.execute("SELECT count(*) FROM legacy_vectors").fetchone()[0]
|
||||
assert count == 49
|
||||
|
||||
rows = legacy_db.execute(
|
||||
"SELECT rowid FROM legacy_vectors WHERE emb MATCH ? AND k = 5",
|
||||
[_f32([1.0, 0.0, 0.0, 0.0])],
|
||||
).fetchall()
|
||||
assert 1 not in [r["rowid"] for r in rows]
|
||||
|
||||
|
||||
def test_legacy_fullscan(legacy_db):
|
||||
"""Full scan should work."""
|
||||
rows = legacy_db.execute(
|
||||
"SELECT rowid FROM legacy_vectors ORDER BY rowid LIMIT 5"
|
||||
).fetchall()
|
||||
assert [r["rowid"] for r in rows] == [1, 2, 3, 4, 5]
|
||||
|
||||
|
||||
def test_legacy_name_conflict_table(legacy_db):
|
||||
"""Legacy table where column name == table name should work.
|
||||
|
||||
The v0.1.6 DB has: CREATE VIRTUAL TABLE emb USING vec0(emb float[4])
|
||||
Current code should NOT add the command column for this table
|
||||
(detected via _info version check), avoiding the name conflict.
|
||||
"""
|
||||
count = legacy_db.execute("SELECT count(*) FROM emb").fetchone()[0]
|
||||
assert count == 10
|
||||
|
||||
rows = legacy_db.execute(
|
||||
"SELECT rowid, distance FROM emb WHERE emb MATCH ? AND k = 3",
|
||||
[_f32([1.0, 0.0, 0.0, 0.0])],
|
||||
).fetchall()
|
||||
assert len(rows) == 3
|
||||
assert rows[0]["rowid"] == 1
|
||||
|
||||
|
||||
def test_legacy_name_conflict_insert_delete(legacy_db):
|
||||
"""INSERT and DELETE on legacy name-conflict table."""
|
||||
legacy_db.execute(
|
||||
"INSERT INTO emb(rowid, emb) VALUES (100, ?)",
|
||||
[_f32([100.0, 0.0, 0.0, 0.0])],
|
||||
)
|
||||
assert legacy_db.execute("SELECT count(*) FROM emb").fetchone()[0] == 11
|
||||
|
||||
legacy_db.execute("DELETE FROM emb WHERE rowid = 5")
|
||||
assert legacy_db.execute("SELECT count(*) FROM emb").fetchone()[0] == 10
|
||||
|
||||
|
||||
def test_legacy_no_command_column(legacy_db):
|
||||
"""Legacy tables should NOT have the command column."""
|
||||
with pytest.raises(sqlite3.OperationalError):
|
||||
legacy_db.execute(
|
||||
"INSERT INTO legacy_vectors(legacy_vectors) VALUES ('some_command')"
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue