test-metadata.py

This commit is contained in:
Alex Garcia 2024-11-13 17:54:10 -08:00
parent 9cba8ffe8c
commit 3e6759cbfc
5 changed files with 480 additions and 5 deletions

View file

@ -52,3 +52,14 @@ The fourth character of the block is a `_` filler.
`argv[i]` is the value of the rowid or id to match against for the point query. `argv[i]` is the value of the rowid or id to match against for the point query.
The remaining 3 characters of the block are `_` fillers. The remaining 3 characters of the block are `_` fillers.
#### `VEC0_IDXSTR_KIND_METADATA_CONSTRAINT` (`'&'`)
`argv[i]` is the value of the `WHERE` constraint for a metdata column in a KNN query.
The second character of the block denotes which metadata column the constraint belongs to, using `A` to denote the first metadata column column, `B` for the second, etc. It is encoded with `'A' + metadata_idx` and can be decoded with `c - 'A'`.
The third character of the block is the constraint operator. It will be one of `enum vec0_metadata_operator`, as only a subset of operators are supported on metadata column KNN filters.
The foruth character of the block is a `_` filler.

7
TODO
View file

@ -18,6 +18,7 @@
- DELETE and UPDATE support - DELETE and UPDATE support
- large strings - large strings
- date/datetime - date/datetime
- `v in (...)` handling - later
- remaining TODO items - `v in (...)` handling
- dictionary encoding? - remaining TODO items
- dictionary encoding?

View file

@ -4074,7 +4074,12 @@ int vec0_result_metadata_value_for_rowid(vec0_vtab *p, i64 rowid, int metadata_i
break; break;
} }
case VEC0_METADATA_COLUMN_KIND_INT64: { case VEC0_METADATA_COLUMN_KIND_INT64: {
// TODO handle int64 values i64 value;
rc = sqlite3_blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(i64));
if(rc != SQLITE_OK) {
goto done;
}
sqlite3_result_int64(context, value);
break; break;
} }
case VEC0_METADATA_COLUMN_KIND_FLOAT: { case VEC0_METADATA_COLUMN_KIND_FLOAT: {
@ -4087,7 +4092,13 @@ int vec0_result_metadata_value_for_rowid(vec0_vtab *p, i64 rowid, int metadata_i
break; break;
} }
case VEC0_METADATA_COLUMN_KIND_DOUBLE: { case VEC0_METADATA_COLUMN_KIND_DOUBLE: {
// TODO handle double values double value;
rc = sqlite3_blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(double));
if(rc != SQLITE_OK) {
goto done;
}
sqlite3_result_double(context, value);
break;
break; break;
} }
case VEC0_METADATA_COLUMN_KIND_TEXT: { case VEC0_METADATA_COLUMN_KIND_TEXT: {
@ -5483,6 +5494,15 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
value = VEC0_METADATA_OPERATOR_NE; value = VEC0_METADATA_OPERATOR_NE;
break; break;
} }
default: {
// IMP: V16511_00582
rc = SQLITE_ERROR;
vtab_set_error(pVTab,
"An illegal WHERE constraint was provided on a vec0 metadata column in a KNN query. "
"Only one of EQUALS, GREATER_THAN, LESS_THAN_OR_EQUAL, LESS_THAN, GREATER_THAN_OR_EQUAL, NOT_EQUALS is allowed."
);
goto done;
}
} }
if(value) { if(value) {

View file

@ -0,0 +1,330 @@
# serializer version: 1
# name: test_constructor_limit[max 16 metadata columns]
dict({
'error': 'OperationalError',
'message': 'vec0 constructor error: More than 16 metadata columns were provided',
})
# ---
# name: test_knn.1
dict({
'error': 'OperationalError',
'message': 'An illegal WHERE constraint was provided on a vec0 metadata column in a KNN query. Only one of EQUALS, GREATER_THAN, LESS_THAN_OR_EQUAL, LESS_THAN, GREATER_THAN_OR_EQUAL, NOT_EQUALS is allowed.',
})
# ---
# name: test_knn[sqlite_master]
OrderedDict({
'sql': "select * from sqlite_master where type = 'table' order by name",
'rows': list([
OrderedDict({
'type': 'table',
'name': 'sqlite_sequence',
'tbl_name': 'sqlite_sequence',
'rootpage': 3,
'sql': 'CREATE TABLE sqlite_sequence(name,seq)',
}),
OrderedDict({
'type': 'table',
'name': 'v',
'tbl_name': 'v',
'rootpage': 0,
'sql': 'CREATE VIRTUAL TABLE v using vec0(vector float[1], name text, chunk_size=8)',
}),
OrderedDict({
'type': 'table',
'name': 'v_chunks',
'tbl_name': 'v_chunks',
'rootpage': 2,
'sql': 'CREATE TABLE "v_chunks"(chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,size INTEGER NOT NULL,validity BLOB NOT NULL,rowids BLOB NOT NULL)',
}),
OrderedDict({
'type': 'table',
'name': 'v_metadata_chunks00',
'tbl_name': 'v_metadata_chunks00',
'rootpage': 7,
'sql': 'CREATE TABLE "v_metadata_chunks00"(rowid PRIMARY KEY, data BLOB NOT NULL)',
}),
OrderedDict({
'type': 'table',
'name': 'v_rowids',
'tbl_name': 'v_rowids',
'rootpage': 4,
'sql': 'CREATE TABLE "v_rowids"(rowid INTEGER PRIMARY KEY AUTOINCREMENT,id,chunk_id INTEGER,chunk_offset INTEGER)',
}),
OrderedDict({
'type': 'table',
'name': 'v_vector_chunks00',
'tbl_name': 'v_vector_chunks00',
'rootpage': 5,
'sql': 'CREATE TABLE "v_vector_chunks00"(rowid PRIMARY KEY,vectors BLOB NOT NULL)',
}),
]),
})
# ---
# name: test_normal.1
dict({
'v_chunks': OrderedDict({
'sql': 'select * from v_chunks',
'rows': list([
]),
}),
'v_metadata_chunks00': OrderedDict({
'sql': 'select * from v_metadata_chunks00',
'rows': list([
]),
}),
'v_metadata_chunks01': OrderedDict({
'sql': 'select * from v_metadata_chunks01',
'rows': list([
]),
}),
'v_metadata_chunks02': OrderedDict({
'sql': 'select * from v_metadata_chunks02',
'rows': list([
]),
}),
'v_metadata_chunks03': OrderedDict({
'sql': 'select * from v_metadata_chunks03',
'rows': list([
]),
}),
'v_metadata_chunks04': OrderedDict({
'sql': 'select * from v_metadata_chunks04',
'rows': list([
]),
}),
'v_rowids': OrderedDict({
'sql': 'select * from v_rowids',
'rows': list([
]),
}),
'v_vector_chunks00': OrderedDict({
'sql': 'select * from v_vector_chunks00',
'rows': list([
]),
}),
})
# ---
# name: test_normal.2
OrderedDict({
'sql': 'insert into v(vector, n1, n2, f, d, t) values (?, ?, ?, ?, ?, ?)',
'rows': list([
]),
})
# ---
# name: test_normal.3
OrderedDict({
'sql': 'insert into v(vector, n1, n2, f, d, t) values (?, ?, ?, ?, ?, ?)',
'rows': list([
]),
})
# ---
# name: test_normal.4
OrderedDict({
'sql': 'insert into v(vector, n1, n2, f, d, t) values (?, ?, ?, ?, ?, ?)',
'rows': list([
]),
})
# ---
# name: test_normal.5
OrderedDict({
'sql': 'select * from v',
'rows': list([
OrderedDict({
'rowid': 1,
'vector': b'\x11\x11\x11\x11',
'n1': 1,
'n2': 1,
'f': 1.100000023841858,
'd': 1.1,
't': 'one',
}),
OrderedDict({
'rowid': 2,
'vector': b'""""',
'n1': 2,
'n2': 2,
'f': 2.200000047683716,
'd': 2.2,
't': 'two',
}),
OrderedDict({
'rowid': 3,
'vector': b'3333',
'n1': 3,
'n2': 3,
'f': 3.299999952316284,
'd': 3.3,
't': 'three',
}),
]),
})
# ---
# name: test_normal.6
dict({
'v_chunks': OrderedDict({
'sql': 'select * from v_chunks',
'rows': list([
OrderedDict({
'chunk_id': 1,
'size': 8,
'validity': b'\x07',
'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
'v_metadata_chunks00': OrderedDict({
'sql': 'select * from v_metadata_chunks00',
'rows': list([
OrderedDict({
'rowid': 1,
'data': b'\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
'v_metadata_chunks01': OrderedDict({
'sql': 'select * from v_metadata_chunks01',
'rows': list([
OrderedDict({
'rowid': 1,
'data': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
'v_metadata_chunks02': OrderedDict({
'sql': 'select * from v_metadata_chunks02',
'rows': list([
OrderedDict({
'rowid': 1,
'data': b'\xcd\xcc\x8c?\xcd\xcc\x0c@33S@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
'v_metadata_chunks03': OrderedDict({
'sql': 'select * from v_metadata_chunks03',
'rows': list([
OrderedDict({
'rowid': 1,
'data': b'\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
'v_metadata_chunks04': OrderedDict({
'sql': 'select * from v_metadata_chunks04',
'rows': list([
OrderedDict({
'rowid': 1,
'data': b'\x03\x00\x00\x00one?\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00two@\x01\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00three\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
'v_rowids': OrderedDict({
'sql': 'select * from v_rowids',
'rows': list([
OrderedDict({
'rowid': 1,
'id': None,
'chunk_id': 1,
'chunk_offset': 0,
}),
OrderedDict({
'rowid': 2,
'id': None,
'chunk_id': 1,
'chunk_offset': 1,
}),
OrderedDict({
'rowid': 3,
'id': None,
'chunk_id': 1,
'chunk_offset': 2,
}),
]),
}),
'v_vector_chunks00': OrderedDict({
'sql': 'select * from v_vector_chunks00',
'rows': list([
OrderedDict({
'rowid': 1,
'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
})
# ---
# name: test_normal[sqlite_master]
OrderedDict({
'sql': "select * from sqlite_master where type = 'table' order by name",
'rows': list([
OrderedDict({
'type': 'table',
'name': 'sqlite_sequence',
'tbl_name': 'sqlite_sequence',
'rootpage': 3,
'sql': 'CREATE TABLE sqlite_sequence(name,seq)',
}),
OrderedDict({
'type': 'table',
'name': 'v',
'tbl_name': 'v',
'rootpage': 0,
'sql': 'CREATE VIRTUAL TABLE v using vec0(vector float[1], n1 int, n2 int64, f float, d double, t text, chunk_size=8)',
}),
OrderedDict({
'type': 'table',
'name': 'v_chunks',
'tbl_name': 'v_chunks',
'rootpage': 2,
'sql': 'CREATE TABLE "v_chunks"(chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,size INTEGER NOT NULL,validity BLOB NOT NULL,rowids BLOB NOT NULL)',
}),
OrderedDict({
'type': 'table',
'name': 'v_metadata_chunks00',
'tbl_name': 'v_metadata_chunks00',
'rootpage': 7,
'sql': 'CREATE TABLE "v_metadata_chunks00"(rowid PRIMARY KEY, data BLOB NOT NULL)',
}),
OrderedDict({
'type': 'table',
'name': 'v_metadata_chunks01',
'tbl_name': 'v_metadata_chunks01',
'rootpage': 9,
'sql': 'CREATE TABLE "v_metadata_chunks01"(rowid PRIMARY KEY, data BLOB NOT NULL)',
}),
OrderedDict({
'type': 'table',
'name': 'v_metadata_chunks02',
'tbl_name': 'v_metadata_chunks02',
'rootpage': 11,
'sql': 'CREATE TABLE "v_metadata_chunks02"(rowid PRIMARY KEY, data BLOB NOT NULL)',
}),
OrderedDict({
'type': 'table',
'name': 'v_metadata_chunks03',
'tbl_name': 'v_metadata_chunks03',
'rootpage': 13,
'sql': 'CREATE TABLE "v_metadata_chunks03"(rowid PRIMARY KEY, data BLOB NOT NULL)',
}),
OrderedDict({
'type': 'table',
'name': 'v_metadata_chunks04',
'tbl_name': 'v_metadata_chunks04',
'rootpage': 15,
'sql': 'CREATE TABLE "v_metadata_chunks04"(rowid PRIMARY KEY, data BLOB NOT NULL)',
}),
OrderedDict({
'type': 'table',
'name': 'v_rowids',
'tbl_name': 'v_rowids',
'rootpage': 4,
'sql': 'CREATE TABLE "v_rowids"(rowid INTEGER PRIMARY KEY AUTOINCREMENT,id,chunk_id INTEGER,chunk_offset INTEGER)',
}),
OrderedDict({
'type': 'table',
'name': 'v_vector_chunks00',
'tbl_name': 'v_vector_chunks00',
'rootpage': 5,
'sql': 'CREATE TABLE "v_vector_chunks00"(rowid PRIMARY KEY,vectors BLOB NOT NULL)',
}),
]),
})
# ---

113
tests/test-metadata.py Normal file
View file

@ -0,0 +1,113 @@
import sqlite3
from collections import OrderedDict
def test_constructor_limit(db, snapshot):
pass
assert exec(
db,
f"""
create virtual table v using vec0(
{",".join([f"metadata{x} integer" for x in range(17)])}
v float[1]
)
""",
) == snapshot(name="max 16 metadata columns")
def test_normal(db, snapshot):
db.execute(
"create virtual table v using vec0(vector float[1], n1 int, n2 int64, f float, d double, t text, chunk_size=8)"
)
assert exec(
db, "select * from sqlite_master where type = 'table' order by name"
) == snapshot(name="sqlite_master")
assert vec0_shadow_table_contents(db, "v") == snapshot()
INSERT = "insert into v(vector, n1, n2, f, d, t) values (?, ?, ?, ?, ?, ?)"
assert exec(db, INSERT, [b"\x11\x11\x11\x11", 1, 1, 1.1, 1.1, "one"]) == snapshot()
assert exec(db, INSERT, [b"\x22\x22\x22\x22", 2, 2, 2.2, 2.2, "two"]) == snapshot()
assert (
exec(db, INSERT, [b"\x33\x33\x33\x33", 3, 3, 3.3, 3.3, "three"]) == snapshot()
)
assert exec(db, "select * from v") == snapshot()
assert vec0_shadow_table_contents(db, "v") == snapshot()
#
# assert exec(db, "select * from v") == snapshot()
# assert vec0_shadow_table_contents(db, "v") == snapshot()
#
# db.execute("drop table v;")
# assert exec(db, "select * from sqlite_master order by name") == snapshot(
# name="sqlite_master post drop"
# )
def test_types(db, snapshot):
pass
def test_updates(db, snapshot):
pass
def test_deletes(db, snapshot):
pass
def test_knn(db, snapshot):
db.execute(
"create virtual table v using vec0(vector float[1], name text, chunk_size=8)"
)
assert exec(
db, "select * from sqlite_master where type = 'table' order by name"
) == snapshot(name="sqlite_master")
db.executemany(
"insert into v(vector, name) values (?, ?)",
[("[1]", "alex"), ("[2]", "brian"), ("[3]", "craig")],
)
# EVIDENCE-OF: V16511_00582 catches "illegal" constraints on metadata columns
assert (
exec(
db,
"select *, distance from v where vector match '[5]' and k = 3 and name like 'illegal'",
)
== snapshot()
)
def exec(db, sql, parameters=[]):
try:
rows = db.execute(sql, parameters).fetchall()
except (sqlite3.OperationalError, sqlite3.DatabaseError) as e:
return {
"error": e.__class__.__name__,
"message": str(e),
}
a = []
for row in rows:
o = OrderedDict()
for k in row.keys():
o[k] = row[k]
a.append(o)
result = OrderedDict()
result["sql"] = sql
result["rows"] = a
return result
def vec0_shadow_table_contents(db, v):
shadow_tables = [
row[0]
for row in db.execute(
"select name from sqlite_master where name like ? order by 1", [f"{v}_%"]
).fetchall()
]
o = {}
for shadow_table in shadow_tables:
o[shadow_table] = exec(db, f"select * from {shadow_table}")
return o