mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 00:36:56 +02:00
Metadata filtering (#124)
* initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
This commit is contained in:
parent
9bfeaa7842
commit
352f953fc0
21 changed files with 7361 additions and 105 deletions
|
|
@ -316,7 +316,7 @@
|
|||
'type': 'table',
|
||||
'name': 'sqlite_sequence',
|
||||
'tbl_name': 'sqlite_sequence',
|
||||
'rootpage': 3,
|
||||
'rootpage': 5,
|
||||
'sql': 'CREATE TABLE sqlite_sequence(name,seq)',
|
||||
}),
|
||||
]),
|
||||
|
|
@ -326,18 +326,25 @@
|
|||
OrderedDict({
|
||||
'sql': 'select * from sqlite_master order by name',
|
||||
'rows': list([
|
||||
OrderedDict({
|
||||
'type': 'index',
|
||||
'name': 'sqlite_autoindex_v_info_1',
|
||||
'tbl_name': 'v_info',
|
||||
'rootpage': 3,
|
||||
'sql': None,
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'index',
|
||||
'name': 'sqlite_autoindex_v_vector_chunks00_1',
|
||||
'tbl_name': 'v_vector_chunks00',
|
||||
'rootpage': 6,
|
||||
'rootpage': 8,
|
||||
'sql': None,
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'sqlite_sequence',
|
||||
'tbl_name': 'sqlite_sequence',
|
||||
'rootpage': 3,
|
||||
'rootpage': 5,
|
||||
'sql': 'CREATE TABLE sqlite_sequence(name,seq)',
|
||||
}),
|
||||
OrderedDict({
|
||||
|
|
@ -351,28 +358,35 @@
|
|||
'type': 'table',
|
||||
'name': 'v_auxiliary',
|
||||
'tbl_name': 'v_auxiliary',
|
||||
'rootpage': 7,
|
||||
'rootpage': 9,
|
||||
'sql': 'CREATE TABLE "v_auxiliary"( rowid integer PRIMARY KEY , value00)',
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'v_chunks',
|
||||
'tbl_name': 'v_chunks',
|
||||
'rootpage': 2,
|
||||
'rootpage': 4,
|
||||
'sql': 'CREATE TABLE "v_chunks"(chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,size INTEGER NOT NULL,validity BLOB NOT NULL,rowids BLOB NOT NULL)',
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'v_info',
|
||||
'tbl_name': 'v_info',
|
||||
'rootpage': 2,
|
||||
'sql': 'CREATE TABLE "v_info" (key text primary key, value any)',
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'v_rowids',
|
||||
'tbl_name': 'v_rowids',
|
||||
'rootpage': 4,
|
||||
'rootpage': 6,
|
||||
'sql': 'CREATE TABLE "v_rowids"(rowid INTEGER PRIMARY KEY AUTOINCREMENT,id,chunk_id INTEGER,chunk_offset INTEGER)',
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'v_vector_chunks00',
|
||||
'tbl_name': 'v_vector_chunks00',
|
||||
'rootpage': 5,
|
||||
'rootpage': 7,
|
||||
'sql': 'CREATE TABLE "v_vector_chunks00"(rowid PRIMARY KEY,vectors BLOB NOT NULL)',
|
||||
}),
|
||||
]),
|
||||
|
|
@ -409,25 +423,25 @@
|
|||
# ---
|
||||
# name: test_types.3
|
||||
dict({
|
||||
'error': 'OperationalError',
|
||||
'error': 'IntegrityError',
|
||||
'message': 'Auxiliary column type mismatch: The auxiliary column aux_int has type INTEGER, but TEXT was provided.',
|
||||
})
|
||||
# ---
|
||||
# name: test_types.4
|
||||
dict({
|
||||
'error': 'OperationalError',
|
||||
'error': 'IntegrityError',
|
||||
'message': 'Auxiliary column type mismatch: The auxiliary column aux_float has type FLOAT, but TEXT was provided.',
|
||||
})
|
||||
# ---
|
||||
# name: test_types.5
|
||||
dict({
|
||||
'error': 'OperationalError',
|
||||
'error': 'IntegrityError',
|
||||
'message': 'Auxiliary column type mismatch: The auxiliary column aux_text has type TEXT, but INTEGER was provided.',
|
||||
})
|
||||
# ---
|
||||
# name: test_types.6
|
||||
dict({
|
||||
'error': 'OperationalError',
|
||||
'error': 'IntegrityError',
|
||||
'message': 'Auxiliary column type mismatch: The auxiliary column aux_blob has type BLOB, but INTEGER was provided.',
|
||||
})
|
||||
# ---
|
||||
|
|
|
|||
184
tests/__snapshots__/test-general.ambr
Normal file
184
tests/__snapshots__/test-general.ambr
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
# serializer version: 1
|
||||
# name: test_info
|
||||
OrderedDict({
|
||||
'sql': 'select key, typeof(value) from v_info order by 1',
|
||||
'rows': list([
|
||||
OrderedDict({
|
||||
'key': 'CREATE_VERSION',
|
||||
'typeof(value)': 'text',
|
||||
}),
|
||||
OrderedDict({
|
||||
'key': 'CREATE_VERSION_MAJOR',
|
||||
'typeof(value)': 'integer',
|
||||
}),
|
||||
OrderedDict({
|
||||
'key': 'CREATE_VERSION_MINOR',
|
||||
'typeof(value)': 'integer',
|
||||
}),
|
||||
OrderedDict({
|
||||
'key': 'CREATE_VERSION_PATCH',
|
||||
'typeof(value)': 'integer',
|
||||
}),
|
||||
]),
|
||||
})
|
||||
# ---
|
||||
# name: test_shadow
|
||||
OrderedDict({
|
||||
'sql': 'select * from sqlite_master order by name',
|
||||
'rows': list([
|
||||
OrderedDict({
|
||||
'type': 'index',
|
||||
'name': 'sqlite_autoindex_v_info_1',
|
||||
'tbl_name': 'v_info',
|
||||
'rootpage': 3,
|
||||
'sql': None,
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'index',
|
||||
'name': 'sqlite_autoindex_v_metadatachunks00_1',
|
||||
'tbl_name': 'v_metadatachunks00',
|
||||
'rootpage': 10,
|
||||
'sql': None,
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'index',
|
||||
'name': 'sqlite_autoindex_v_metadatatext00_1',
|
||||
'tbl_name': 'v_metadatatext00',
|
||||
'rootpage': 12,
|
||||
'sql': None,
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'index',
|
||||
'name': 'sqlite_autoindex_v_vector_chunks00_1',
|
||||
'tbl_name': 'v_vector_chunks00',
|
||||
'rootpage': 8,
|
||||
'sql': None,
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'sqlite_sequence',
|
||||
'tbl_name': 'sqlite_sequence',
|
||||
'rootpage': 5,
|
||||
'sql': 'CREATE TABLE sqlite_sequence(name,seq)',
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'v',
|
||||
'tbl_name': 'v',
|
||||
'rootpage': 0,
|
||||
'sql': 'CREATE VIRTUAL TABLE v using vec0(a float[1], partition text partition key, metadata text, +name text, chunk_size=8)',
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'v_auxiliary',
|
||||
'tbl_name': 'v_auxiliary',
|
||||
'rootpage': 13,
|
||||
'sql': 'CREATE TABLE "v_auxiliary"( rowid integer PRIMARY KEY , value00)',
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'v_chunks',
|
||||
'tbl_name': 'v_chunks',
|
||||
'rootpage': 4,
|
||||
'sql': 'CREATE TABLE "v_chunks"(chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,size INTEGER NOT NULL,sequence_id integer,partition00,validity BLOB NOT NULL, rowids BLOB NOT NULL)',
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'v_info',
|
||||
'tbl_name': 'v_info',
|
||||
'rootpage': 2,
|
||||
'sql': 'CREATE TABLE "v_info" (key text primary key, value any)',
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'v_metadatachunks00',
|
||||
'tbl_name': 'v_metadatachunks00',
|
||||
'rootpage': 9,
|
||||
'sql': 'CREATE TABLE "v_metadatachunks00"(rowid PRIMARY KEY, data BLOB NOT NULL)',
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'v_metadatatext00',
|
||||
'tbl_name': 'v_metadatatext00',
|
||||
'rootpage': 11,
|
||||
'sql': 'CREATE TABLE "v_metadatatext00"(rowid PRIMARY KEY, data TEXT)',
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'v_rowids',
|
||||
'tbl_name': 'v_rowids',
|
||||
'rootpage': 6,
|
||||
'sql': 'CREATE TABLE "v_rowids"(rowid INTEGER PRIMARY KEY AUTOINCREMENT,id,chunk_id INTEGER,chunk_offset INTEGER)',
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'v_vector_chunks00',
|
||||
'tbl_name': 'v_vector_chunks00',
|
||||
'rootpage': 7,
|
||||
'sql': 'CREATE TABLE "v_vector_chunks00"(rowid PRIMARY KEY,vectors BLOB NOT NULL)',
|
||||
}),
|
||||
]),
|
||||
})
|
||||
# ---
|
||||
# name: test_shadow.1
|
||||
OrderedDict({
|
||||
'sql': "select * from pragma_table_list where type = 'shadow'",
|
||||
'rows': list([
|
||||
OrderedDict({
|
||||
'schema': 'main',
|
||||
'name': 'v_auxiliary',
|
||||
'type': 'shadow',
|
||||
'ncol': 2,
|
||||
'wr': 0,
|
||||
'strict': 0,
|
||||
}),
|
||||
OrderedDict({
|
||||
'schema': 'main',
|
||||
'name': 'v_chunks',
|
||||
'type': 'shadow',
|
||||
'ncol': 6,
|
||||
'wr': 0,
|
||||
'strict': 0,
|
||||
}),
|
||||
OrderedDict({
|
||||
'schema': 'main',
|
||||
'name': 'v_info',
|
||||
'type': 'shadow',
|
||||
'ncol': 2,
|
||||
'wr': 0,
|
||||
'strict': 0,
|
||||
}),
|
||||
OrderedDict({
|
||||
'schema': 'main',
|
||||
'name': 'v_rowids',
|
||||
'type': 'shadow',
|
||||
'ncol': 4,
|
||||
'wr': 0,
|
||||
'strict': 0,
|
||||
}),
|
||||
OrderedDict({
|
||||
'schema': 'main',
|
||||
'name': 'v_metadatachunks00',
|
||||
'type': 'shadow',
|
||||
'ncol': 2,
|
||||
'wr': 0,
|
||||
'strict': 0,
|
||||
}),
|
||||
OrderedDict({
|
||||
'schema': 'main',
|
||||
'name': 'v_metadatatext00',
|
||||
'type': 'shadow',
|
||||
'ncol': 2,
|
||||
'wr': 0,
|
||||
'strict': 0,
|
||||
}),
|
||||
]),
|
||||
})
|
||||
# ---
|
||||
# name: test_shadow.2
|
||||
OrderedDict({
|
||||
'sql': "select * from pragma_table_list where type = 'shadow'",
|
||||
'rows': list([
|
||||
]),
|
||||
})
|
||||
# ---
|
||||
4097
tests/__snapshots__/test-metadata.ambr
Normal file
4097
tests/__snapshots__/test-metadata.ambr
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue