From 352f953fc0a8ac45953c451c36ee28f719c2d079 Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Wed, 20 Nov 2024 00:59:34 -0800 Subject: [PATCH] Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests --- .github/workflows/test.yaml | 1 + .gitignore | 5 + ARCHITECTURE.md | 82 +- Makefile | 3 + TODO | 28 +- sqlite-vec.c | 1741 +++++++++- sqlite-vec.h.tmpl | 7 + test.sql | 327 +- tests/__snapshots__/test-auxiliary.ambr | 36 +- tests/__snapshots__/test-general.ambr | 184 + tests/__snapshots__/test-metadata.ambr | 4097 +++++++++++++++++++++++ tests/afbd/.gitignore | 1 + tests/afbd/.python-version | 1 + tests/afbd/Makefile | 9 + tests/afbd/README.md | 12 + tests/afbd/test-afbd.py | 231 ++ tests/test-auxiliary.py | 6 + tests/test-general.py | 60 + tests/test-loadable.py | 4 + tests/test-metadata.py | 629 ++++ tests/test-partition-keys.py | 2 + 21 files changed, 7361 insertions(+), 105 deletions(-) create mode 100644 tests/__snapshots__/test-general.ambr create mode 100644 tests/__snapshots__/test-metadata.ambr create mode 100644 tests/afbd/.gitignore create mode 100644 tests/afbd/.python-version create mode 100644 tests/afbd/Makefile create mode 100644 tests/afbd/README.md create mode 100644 tests/afbd/test-afbd.py create mode 100644 tests/test-general.py create mode 100644 tests/test-metadata.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index a39b78a..3c2b4c9 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -5,6 +5,7 @@ on: - main - partition-by - auxiliary + - metadata-filtering permissions: contents: read jobs: diff --git a/.gitignore b/.gitignore index ef7a661..0268d5d 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,8 @@ sqlite-vec.h tmp/ poetry.lock + +*.jsonl + +memstat.c +memstat.* diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 8ac9501..4f62e16 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -1,5 +1,51 @@ +# `sqlite-vec` Architecture + +Internal documentation for how `sqlite-vec` works under-the-hood. Not meant for +users of the `sqlite-vec` project, consult +[the official `sqlite-vec` documentation](https://alexgarcia.xyz/sqlite-vec) for +how-to-guides. Rather, this is for people interested in how `sqlite-vec` works +and some guidelines to any future contributors. + +Very much a WIP. + ## `vec0` +### Shadow Tables + +#### `xyz_chunks` + +- `chunk_id INTEGER` +- `size INTEGER` +- `validity BLOB` +- `rowids BLOB` + +#### `xyz_rowids` + +- `rowid INTEGER` +- `id` +- `chunk_id INTEGER` +- `chunk_offset INTEGER` + +#### `xyz_vector_chunksNN` + +- `rowid INTEGER` +- `vector BLOB` + +#### `xyz_auxiliary` + +- `rowid INTEGER` +- `valueNN [type]` + +#### `xyz_metadatachunksNN` + +- `rowid INTEGER` +- `data BLOB` + +#### `xyz_metadatatextNN` + +- `rowid INTEGER` +- `data TEXT` + ### idxStr The `vec0` idxStr is a string composed of single "header" character and 0 or @@ -14,8 +60,11 @@ The "header" charcter denotes the type of query plan, as determined by the | `VEC0_QUERY_PLAN_POINT` | `'2'` | Perform a single-lookup point query for the provided rowid | | `VEC0_QUERY_PLAN_KNN` | `'3'` | Perform a KNN-style query on the provided query vector and parameters. | -Each 4-character "block" is associated with a corresponding value in `argv[]`. For example, the 1st block at byte offset `1-4` (inclusive) is the 1st block and is associated with `argv[1]`. The 2nd block at byte offset `5-8` (inclusive) is associated with `argv[2]` and so on. Each block describes what kind of value or filter the given `argv[i]` value is. - +Each 4-character "block" is associated with a corresponding value in `argv[]`. +For example, the 1st block at byte offset `1-4` (inclusive) is the 1st block and +is associated with `argv[1]`. The 2nd block at byte offset `5-8` (inclusive) is +associated with `argv[2]` and so on. Each block describes what kind of value or +filter the given `argv[i]` value is. #### `VEC0_IDXSTR_KIND_KNN_MATCH` (`'{'`) @@ -31,8 +80,8 @@ The remaining 3 characters of the block are `_` fillers. #### `VEC0_IDXSTR_KIND_KNN_ROWID_IN` (`'['`) -`argv[i]` is the optional `rowid in (...)` value, and must be handled with [`sqlite3_vtab_in_first()` / -`sqlite3_vtab_in_next()`](https://www.sqlite.org/c3ref/vtab_in_first.html). +`argv[i]` is the optional `rowid in (...)` value, and must be handled with +[`sqlite3_vtab_in_first()` / `sqlite3_vtab_in_next()`](https://www.sqlite.org/c3ref/vtab_in_first.html). The remaining 3 characters of the block are `_` fillers. @@ -40,15 +89,34 @@ The remaining 3 characters of the block are `_` fillers. `argv[i]` is a "constraint" on a specific partition key. -The second character of the block denotes which partition key to filter on, using `A` to denote the first partition key column, `B` for the second, etc. It is encoded with `'A' + partition_idx` and can be decoded with `c - 'A'`. +The second character of the block denotes which partition key to filter on, +using `A` to denote the first partition key column, `B` for the second, etc. It +is encoded with `'A' + partition_idx` and can be decoded with `c - 'A'`. -The third character of the block denotes which operator is used in the constraint. It will be one of the values of `enum vec0_partition_operator`, as only a subset of operations are supported on partition keys. +The third character of the block denotes which operator is used in the +constraint. It will be one of the values of `enum vec0_partition_operator`, as +only a subset of operations are supported on partition keys. The fourth character of the block is a `_` filler. - #### `VEC0_IDXSTR_KIND_POINT_ID` (`'!'`) `argv[i]` is the value of the rowid or id to match against for the point query. The remaining 3 characters of the block are `_` fillers. + +#### `VEC0_IDXSTR_KIND_METADATA_CONSTRAINT` (`'&'`) + +`argv[i]` is the value of the `WHERE` constraint for a metdata column in a KNN +query. + +The second character of the block denotes which metadata column the constraint +belongs to, using `A` to denote the first metadata column column, `B` for the +second, etc. It is encoded with `'A' + metadata_idx` and can be decoded with +`c - 'A'`. + +The third character of the block is the constraint operator. It will be one of +`enum vec0_metadata_operator`, as only a subset of operators are supported on +metadata column KNN filters. + +The foruth character of the block is a `_` filler. diff --git a/Makefile b/Makefile index 1496b7a..43e1d98 100644 --- a/Makefile +++ b/Makefile @@ -153,6 +153,9 @@ sqlite-vec.h: sqlite-vec.h.tmpl VERSION VERSION=$(shell cat VERSION) \ DATE=$(shell date -r VERSION +'%FT%TZ%z') \ SOURCE=$(shell git log -n 1 --pretty=format:%H -- VERSION) \ + VERSION_MAJOR=$$(echo $$VERSION | cut -d. -f1) \ + VERSION_MINOR=$$(echo $$VERSION | cut -d. -f2) \ + VERSION_PATCH=$$(echo $$VERSION | cut -d. -f3 | cut -d- -f1) \ envsubst < $< > $@ clean: diff --git a/TODO b/TODO index 3914aba..b3962b7 100644 --- a/TODO +++ b/TODO @@ -1,13 +1,17 @@ -# partition +- [ ] add `xyz_info` shadow table with version etc. -- [ ] UPDATE on partition key values - - remove previous row from chunk, insert into new one? -- [ ] properly sqlite3_vtab_nochange / sqlite3_value_nochange handling - -# auxiliary columns - -- later: - - NOT NULL? - - perf: INSERT stmt should be cached on vec0_vtab - - perf: LEFT JOIN aux table to rowids query in vec0_cursor for rowid/point - stmts, to avoid N lookup queries +- later + - [ ] partition: UPDATE support + - [ ] skip invalid validity entries in knn filter? + - [ ] nulls in metadata + - [ ] partition `x in (...)` handling + - [ ] blobs/date/datetime + - [ ] uuid/ulid perf + - [ ] Aux columns: `NOT NULL` constraint + - [ ] Metadata columns: `NOT NULL` constraint + - [ ] Partiion key: `NOT NULL` constraint + - [ ] dictionary encoding? + - [ ] properly sqlite3_vtab_nochange / sqlite3_value_nochange handling + - [ ] perf + - [ ] aux: cache INSERT + - [ ] aux: LEFT JOIN on `_rowids` queries to avoid N lookup queries diff --git a/sqlite-vec.c b/sqlite-vec.c index 062381a..d8490cd 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -2043,7 +2043,7 @@ int vec0_parse_auxiliary_column_definition(const char *source, int source_length // Check first token is '+', which denotes aux columns int rc = vec0_scanner_next(&scanner, &token); - if (rc != VEC0_TOKEN_RESULT_SOME && + if (rc != VEC0_TOKEN_RESULT_SOME || token.token_type != TOKEN_TYPE_PLUS) { return SQLITE_EMPTY; } @@ -2088,6 +2088,74 @@ int vec0_parse_auxiliary_column_definition(const char *source, int source_length return SQLITE_OK; } +typedef enum { + VEC0_METADATA_COLUMN_KIND_BOOLEAN, + VEC0_METADATA_COLUMN_KIND_INTEGER, + VEC0_METADATA_COLUMN_KIND_FLOAT, + VEC0_METADATA_COLUMN_KIND_TEXT, + // future: blob, date, datetime +} vec0_metadata_column_kind; + +/** + * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if + * it's an metadata column definition, ie `[name] [type]` like `is_released boolean` + * + * @param source: argv[i] source string + * @param source_length: length of the source string + * @param out_column_name: If it is a metadata column, the output column name. Same lifetime + * as source, points to specific char * + * @param out_column_name_length: Length of out_column_name in bytes + * @param out_column_type: one of vec0_metadata_column_kind + * @return int: SQLITE_EMPTY if not an metadata column, SQLITE_OK if it is. + */ +int vec0_parse_metadata_column_definition(const char *source, int source_length, + char **out_column_name, + int *out_column_name_length, + vec0_metadata_column_kind *out_column_type) { + struct Vec0Scanner scanner; + struct Vec0Token token; + char *column_name; + int column_name_length; + vec0_metadata_column_kind column_type; + int rc; + vec0_scanner_init(&scanner, source, source_length); + + rc = vec0_scanner_next(&scanner, &token); + if (rc != VEC0_TOKEN_RESULT_SOME || + token.token_type != TOKEN_TYPE_IDENTIFIER) { + return SQLITE_EMPTY; + } + + column_name = token.start; + column_name_length = token.end - token.start; + + // Check the next token matches a valid metadata type + rc = vec0_scanner_next(&scanner, &token); + if (rc != VEC0_TOKEN_RESULT_SOME || + token.token_type != TOKEN_TYPE_IDENTIFIER) { + return SQLITE_EMPTY; + } + char * t = token.start; + int n = token.end - token.start; + if (sqlite3_strnicmp(t, "boolean", n) == 0 || sqlite3_strnicmp(t, "bool", n) == 0) { + column_type = VEC0_METADATA_COLUMN_KIND_BOOLEAN; + }else if (sqlite3_strnicmp(t, "int64", n) == 0 || sqlite3_strnicmp(t, "integer64", n) == 0 || sqlite3_strnicmp(t, "integer", n) == 0 || sqlite3_strnicmp(t, "int", n) == 0) { + column_type = VEC0_METADATA_COLUMN_KIND_INTEGER; + }else if (sqlite3_strnicmp(t, "float", n) == 0 || sqlite3_strnicmp(t, "double", n) == 0 || sqlite3_strnicmp(t, "float64", n) == 0 || sqlite3_strnicmp(t, "f64", n) == 0) { + column_type = VEC0_METADATA_COLUMN_KIND_FLOAT; + } else if (sqlite3_strnicmp(t, "text", n) == 0) { + column_type = VEC0_METADATA_COLUMN_KIND_TEXT; + } else { + return SQLITE_EMPTY; + } + + *out_column_name = column_name; + *out_column_name_length = column_name_length; + *out_column_type = column_type; + + return SQLITE_OK; +} + /** * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if * it's a PRIMARY KEY definition. @@ -2190,6 +2258,11 @@ struct Vec0AuxiliaryColumnDefinition { char * name; int name_length; }; +struct Vec0MetadataColumnDefinition { + vec0_metadata_column_kind kind; + char * name; + int name_length; +}; size_t vector_byte_size(enum VectorElementType element_type, size_t dimensions) { @@ -2268,7 +2341,7 @@ int vec0_parse_vector_column(const char *source, int source_length, // left '[' bracket rc = vec0_scanner_next(&scanner, &token); if (rc != VEC0_TOKEN_RESULT_SOME && token.token_type != TOKEN_TYPE_LBRACKET) { - return SQLITE_ERROR; + return SQLITE_EMPTY; } // digit, for vector dimension length @@ -3302,6 +3375,8 @@ static sqlite3_module vec_npy_eachModule = { #define VEC0_COLUMN_OFFSET_DISTANCE 1 #define VEC0_COLUMN_OFFSET_K 2 +#define VEC0_SHADOW_INFO_NAME "\"%w\".\"%w_info\"" + #define VEC0_SHADOW_CHUNKS_NAME "\"%w\".\"%w_chunks\"" /// 1) schema, 2) original vtab table name #define VEC0_SHADOW_CHUNKS_CREATE \ @@ -3345,6 +3420,9 @@ static sqlite3_module vec_npy_eachModule = { #define VEC0_SHADOW_AUXILIARY_NAME "\"%w\".\"%w_auxiliary\"" +#define VEC0_SHADOW_METADATA_N_NAME "\"%w\".\"%w_metadatachunks%02d\"" +#define VEC0_SHADOW_METADATA_TEXT_DATA_NAME "\"%w\".\"%w_metadatatext%02d\"" + #define VEC_INTERAL_ERROR "Internal sqlite-vec error: " #define REPORT_URL "https://github.com/asg017/sqlite-vec/issues/new" @@ -3353,8 +3431,11 @@ typedef struct vec0_vtab vec0_vtab; #define VEC0_MAX_VECTOR_COLUMNS 16 #define VEC0_MAX_PARTITION_COLUMNS 4 #define VEC0_MAX_AUXILIARY_COLUMNS 16 +#define VEC0_MAX_METADATA_COLUMNS 16 #define SQLITE_VEC_VEC0_MAX_DIMENSIONS 8192 +#define VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH 16 +#define VEC0_METADATA_TEXT_VIEW_DATA_LENGTH 12 typedef enum { // vector column, ie "contents_embedding float[1024]" @@ -3366,7 +3447,8 @@ typedef enum { // SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY = 3, - // TODO: metadata filters + // metadata column that can be filtered, ie "genre text" + SQLITE_VEC0_USER_COLUMN_KIND_METADATA = 4, } vec0_user_column_kind; struct vec0_vtab { @@ -3388,6 +3470,9 @@ struct vec0_vtab { // number of defined auxiliary columns int numAuxiliaryColumns; + // number of defined metadata columns + int numMetadataColumns; + // Name of the schema the table exists on. // Must be freed with sqlite3_free() @@ -3407,9 +3492,10 @@ struct vec0_vtab { // contains enum vec0_user_column_kind values for up to // numVectorColumns + numPartitionColumns entries - vec0_user_column_kind user_column_kinds[VEC0_MAX_VECTOR_COLUMNS + VEC0_MAX_PARTITION_COLUMNS + VEC0_MAX_AUXILIARY_COLUMNS]; + vec0_user_column_kind user_column_kinds[VEC0_MAX_VECTOR_COLUMNS + VEC0_MAX_PARTITION_COLUMNS + VEC0_MAX_AUXILIARY_COLUMNS + VEC0_MAX_METADATA_COLUMNS]; + + uint8_t user_column_idxs[VEC0_MAX_VECTOR_COLUMNS + VEC0_MAX_PARTITION_COLUMNS + VEC0_MAX_AUXILIARY_COLUMNS + VEC0_MAX_METADATA_COLUMNS]; - uint8_t user_column_idxs[VEC0_MAX_VECTOR_COLUMNS + VEC0_MAX_PARTITION_COLUMNS + VEC0_MAX_AUXILIARY_COLUMNS]; // Name of all the vector chunk shadow tables. // Ex '_vector_chunks00' @@ -3417,9 +3503,15 @@ struct vec0_vtab { // The first numVectorColumns entries must be freed with sqlite3_free() char *shadowVectorChunksNames[VEC0_MAX_VECTOR_COLUMNS]; + // Name of all metadata chunk shadow tables, ie `_metadatachunks00` + // Only the first numMetadataColumns entries will be available. + // The first numMetadataColumns entries must be freed with sqlite3_free() + char *shadowMetadataChunksNames[VEC0_MAX_METADATA_COLUMNS]; + struct VectorColumnDefinition vector_columns[VEC0_MAX_VECTOR_COLUMNS]; struct Vec0PartitionColumnDefinition paritition_columns[VEC0_MAX_PARTITION_COLUMNS]; struct Vec0AuxiliaryColumnDefinition auxiliary_columns[VEC0_MAX_AUXILIARY_COLUMNS]; + struct Vec0MetadataColumnDefinition metadata_columns[VEC0_MAX_METADATA_COLUMNS]; int chunk_size; @@ -3525,7 +3617,7 @@ void vec0_free(vec0_vtab *p) { } int vec0_num_defined_user_columns(vec0_vtab *p) { - return p->numVectorColumns + p->numPartitionColumns + p->numAuxiliaryColumns; + return p->numVectorColumns + p->numPartitionColumns + p->numAuxiliaryColumns + p->numMetadataColumns; } /** @@ -3607,6 +3699,25 @@ int vec0_column_idx_to_auxiliary_idx(vec0_vtab *pVtab, int column_idx) { return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START]; } +/** + * Returns 1 if the given column-based index is a metadata column, + * 0 otherwise. + */ +int vec0_column_idx_is_metadata(vec0_vtab *pVtab, int column_idx) { + return column_idx >= VEC0_COLUMN_USERN_START && + column_idx <= (VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns(pVtab) - 1) && + pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START] == SQLITE_VEC0_USER_COLUMN_KIND_METADATA; +} + +/** + * Returns the metadata column index of the given user column index. + * ONLY call if validated with vec0_column_idx_is_metadata before + */ +int vec0_column_idx_to_metadata_idx(vec0_vtab *pVtab, int column_idx) { + UNUSED_PARAMETER(pVtab); + return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START]; +} + /** * @brief Retrieve the chunk_id, chunk_offset, and possible "id" value * of a vec0_vtab row with the provided rowid @@ -3922,6 +4033,102 @@ int vec0_get_auxiliary_value_for_rowid(vec0_vtab *pVtab, i64 rowid, int auxiliar return rc; } +/** + * @brief Result the given metadata value for the given row and metadata column index. + * Will traverse the metadatachunksNN table with BLOB I/0 for the given rowid. + * + * @param p + * @param rowid + * @param metadata_idx + * @param context + * @return int + */ +int vec0_result_metadata_value_for_rowid(vec0_vtab *p, i64 rowid, int metadata_idx, sqlite3_context * context) { + int rc; + i64 chunk_id; + i64 chunk_offset; + rc = vec0_get_chunk_position(p, rowid, NULL, &chunk_id, &chunk_offset); + if(rc != SQLITE_OK) { + return rc; + } + sqlite3_blob * blobValue; + rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 0, &blobValue); + if(rc != SQLITE_OK) { + return rc; + } + + switch(p->metadata_columns[metadata_idx].kind) { + case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { + u8 block; + rc = sqlite3_blob_read(blobValue, &block, sizeof(block), chunk_offset / CHAR_BIT); + if(rc != SQLITE_OK) { + goto done; + } + int value = block >> ((chunk_offset % CHAR_BIT)) & 1; + sqlite3_result_int(context, value); + break; + } + case VEC0_METADATA_COLUMN_KIND_INTEGER: { + i64 value; + rc = sqlite3_blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(i64)); + if(rc != SQLITE_OK) { + goto done; + } + sqlite3_result_int64(context, value); + break; + } + case VEC0_METADATA_COLUMN_KIND_FLOAT: { + double value; + rc = sqlite3_blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(double)); + if(rc != SQLITE_OK) { + goto done; + } + sqlite3_result_double(context, value); + break; + } + case VEC0_METADATA_COLUMN_KIND_TEXT: { + u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; + rc = sqlite3_blob_read(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH); + if(rc != SQLITE_OK) { + goto done; + } + int length = ((int *)view)[0]; + if(length <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) { + sqlite3_result_text(context, (const char*) (view + 4), length, SQLITE_TRANSIENT); + } + else { + sqlite3_stmt * stmt; + const char * zSql = sqlite3_mprintf("SELECT data FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx); + if(!zSql) { + rc = SQLITE_ERROR; + goto done; + } + rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL); + sqlite3_free((void *) zSql); + if(rc != SQLITE_OK) { + goto done; + } + sqlite3_bind_int64(stmt, 1, rowid); + rc = sqlite3_step(stmt); + if(rc != SQLITE_ROW) { + sqlite3_finalize(stmt); + rc = SQLITE_ERROR; + goto done; + } + sqlite3_result_value(context, sqlite3_column_value(stmt, 0)); + sqlite3_finalize(stmt); + rc = SQLITE_OK; + } + break; + } + } + done: + // blobValue is read-only, will not fail on close + sqlite3_blob_close(blobValue); + return rc; + +} + int vec0_get_latest_chunk_rowid(vec0_vtab *p, i64 *chunk_rowid, sqlite3_value ** partitionKeyValues) { int rc; const char *zSql; @@ -4122,6 +4329,20 @@ complete: return rc; } +int vec0_metadata_chunk_size(vec0_metadata_column_kind kind, int chunk_size) { + switch(kind) { + case VEC0_METADATA_COLUMN_KIND_BOOLEAN: + return chunk_size / 8; + case VEC0_METADATA_COLUMN_KIND_INTEGER: + return chunk_size * sizeof(i64); + case VEC0_METADATA_COLUMN_KIND_FLOAT: + return chunk_size * sizeof(double); + case VEC0_METADATA_COLUMN_KIND_TEXT: + return chunk_size * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH; + } + return 0; +} + int vec0_rowids_update_position(vec0_vtab *p, i64 rowid, i64 chunk_rowid, i64 chunk_offset) { int rc = SQLITE_OK; @@ -4284,6 +4505,38 @@ int vec0_new_chunk(vec0_vtab *p, sqlite3_value ** partitionKeyValues, i64 *chunk } } + // Step 3: Create new metadata chunks for each metadata column + for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { + if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) { + continue; + } + int metadata_column_idx = p->user_column_idxs[i]; + zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_METADATA_N_NAME + "(rowid, data)" + "VALUES (?, ?)", + p->schemaName, p->tableName, metadata_column_idx); + if (!zSql) { + return SQLITE_NOMEM; + } + rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL); + sqlite3_free(zSql); + + if (rc != SQLITE_OK) { + sqlite3_finalize(stmt); + return rc; + } + + sqlite3_bind_int64(stmt, 1, rowid); + sqlite3_bind_zeroblob64(stmt, 2, vec0_metadata_chunk_size(p->metadata_columns[metadata_column_idx].kind, p->chunk_size)); + + rc = sqlite3_step(stmt); + sqlite3_finalize(stmt); + if (rc != SQLITE_DONE) { + return rc; + } + } + + if (chunk_rowid) { *chunk_rowid = rowid; } @@ -4399,6 +4652,7 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv, int numVectorColumns = 0; int numPartitionColumns = 0; int numAuxiliaryColumns = 0; + int numMetadataColumns = 0; int user_column_idx = 0; // track if a "primary key" column is defined @@ -4410,6 +4664,7 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv, struct VectorColumnDefinition vecColumn; struct Vec0PartitionColumnDefinition partitionColumn; struct Vec0AuxiliaryColumnDefinition auxColumn; + struct Vec0MetadataColumnDefinition metadataColumn; char *cName = NULL; int cNameLength; int cType; @@ -4519,6 +4774,33 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv, continue; } + vec0_metadata_column_kind kind; + rc = vec0_parse_metadata_column_definition(argv[i], strlen(argv[i]), &cName, + &cNameLength, &kind); + if(rc == SQLITE_OK) { + if (numMetadataColumns >= VEC0_MAX_METADATA_COLUMNS) { + *pzErr = sqlite3_mprintf( + VEC_CONSTRUCTOR_ERROR + "More than %d metadata columns were provided", + VEC0_MAX_METADATA_COLUMNS); + goto error; + } + metadataColumn.kind = kind; + metadataColumn.name_length = cNameLength; + metadataColumn.name = sqlite3_mprintf("%.*s", cNameLength, cName); + if(!metadataColumn.name) { + rc = SQLITE_NOMEM; + goto error; + } + + pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_METADATA; + pNew->user_column_idxs[user_column_idx] = numMetadataColumns; + memcpy(&pNew->metadata_columns[numMetadataColumns], &metadataColumn, sizeof(metadataColumn)); + numMetadataColumns++; + user_column_idx++; + continue; + } + // Scenario #4: Constructor argument is a table-level option, ie `chunk_size` char *key; @@ -4586,7 +4868,7 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv, } else { sqlite3_str_appendall(createStr, "rowid, "); } - for (int i = 0; i < numVectorColumns + numPartitionColumns + numAuxiliaryColumns; i++) { + for (int i = 0; i < numVectorColumns + numPartitionColumns + numAuxiliaryColumns + numMetadataColumns; i++) { switch(pNew->user_column_kinds[i]) { case SQLITE_VEC0_USER_COLUMN_KIND_VECTOR: { int vector_idx = pNew->user_column_idxs[i]; @@ -4609,6 +4891,13 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv, pNew->auxiliary_columns[auxiliary_idx].name); break; } + case SQLITE_VEC0_USER_COLUMN_KIND_METADATA: { + int metadata_idx = pNew->user_column_idxs[i]; + sqlite3_str_appendf(createStr, "\"%.*w\", ", + pNew->metadata_columns[metadata_idx].name_length, + pNew->metadata_columns[metadata_idx].name); + break; + } } } @@ -4653,6 +4942,7 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv, pNew->numVectorColumns = numVectorColumns; pNew->numPartitionColumns = numPartitionColumns; pNew->numAuxiliaryColumns = numAuxiliaryColumns; + pNew->numMetadataColumns = numMetadataColumns; for (int i = 0; i < pNew->numVectorColumns; i++) { pNew->shadowVectorChunksNames[i] = @@ -4661,6 +4951,13 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv, goto error; } } + for (int i = 0; i < pNew->numMetadataColumns; i++) { + pNew->shadowMetadataChunksNames[i] = + sqlite3_mprintf("%s_metadatachunks%02d", tableName, i); + if (!pNew->shadowMetadataChunksNames[i]) { + goto error; + } + } pNew->chunk_size = chunk_size; // if xCreate, then create the necessary shadow tables @@ -4668,6 +4965,59 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv, sqlite3_stmt *stmt; int rc; + char * zCreateInfo = sqlite3_mprintf("CREATE TABLE "VEC0_SHADOW_INFO_NAME " (key text primary key, value any)", pNew->schemaName, pNew->tableName); + if(!zCreateInfo) { + goto error; + } + rc = sqlite3_prepare_v2(db, zCreateInfo, -1, &stmt, NULL); + + sqlite3_free((void *) zCreateInfo); + if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) { + // TODO(IMP) + sqlite3_finalize(stmt); + *pzErr = sqlite3_mprintf("Could not create '_info' shadow table: %s", + sqlite3_errmsg(db)); + goto error; + } + sqlite3_finalize(stmt); + + char * zSeedInfo = sqlite3_mprintf( + "INSERT INTO "VEC0_SHADOW_INFO_NAME "(key, value) VALUES " + "(?1, ?2), (?3, ?4), (?5, ?6), (?7, ?8) ", + pNew->schemaName, pNew->tableName + ); + if(!zSeedInfo) { + goto error; + } + rc = sqlite3_prepare_v2(db, zSeedInfo, -1, &stmt, NULL); + sqlite3_free((void *) zSeedInfo); + if (rc != SQLITE_OK) { + // TODO(IMP) + sqlite3_finalize(stmt); + *pzErr = sqlite3_mprintf("Could not seed '_info' shadow table: %s", + sqlite3_errmsg(db)); + goto error; + } + sqlite3_bind_text(stmt, 1, "CREATE_VERSION", -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 2, SQLITE_VEC_VERSION, -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 3, "CREATE_VERSION_MAJOR", -1, SQLITE_STATIC); + sqlite3_bind_int(stmt, 4, SQLITE_VEC_VERSION_MAJOR); + sqlite3_bind_text(stmt, 5, "CREATE_VERSION_MINOR", -1, SQLITE_STATIC); + sqlite3_bind_int(stmt, 6, SQLITE_VEC_VERSION_MINOR); + sqlite3_bind_text(stmt, 7, "CREATE_VERSION_PATCH", -1, SQLITE_STATIC); + sqlite3_bind_int(stmt, 8, SQLITE_VEC_VERSION_PATCH); + + if(sqlite3_step(stmt) != SQLITE_DONE) { + // TODO(IMP) + sqlite3_finalize(stmt); + *pzErr = sqlite3_mprintf("Could not seed '_info' shadow table: %s", + sqlite3_errmsg(db)); + goto error; + } + sqlite3_finalize(stmt); + + + // create the _chunks shadow table char *zCreateShadowChunks = NULL; if(pNew->numPartitionColumns) { @@ -4741,6 +5091,43 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv, sqlite3_finalize(stmt); } + for (int i = 0; i < pNew->numMetadataColumns; i++) { + char *zSql = sqlite3_mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_N_NAME "(rowid PRIMARY KEY, data BLOB NOT NULL);", + pNew->schemaName, pNew->tableName, i); + if (!zSql) { + goto error; + } + rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, 0); + sqlite3_free((void *)zSql); + if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) { + sqlite3_finalize(stmt); + *pzErr = sqlite3_mprintf( + "Could not create '_metata_chunks%02d' shadow table: %s", i, + sqlite3_errmsg(db)); + goto error; + } + sqlite3_finalize(stmt); + + if(pNew->metadata_columns[i].kind == VEC0_METADATA_COLUMN_KIND_TEXT) { + char *zSql = sqlite3_mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME "(rowid PRIMARY KEY, data TEXT);", + pNew->schemaName, pNew->tableName, i); + if (!zSql) { + goto error; + } + rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, 0); + sqlite3_free((void *)zSql); + if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) { + sqlite3_finalize(stmt); + *pzErr = sqlite3_mprintf( + "Could not create '_metadatatext%02d' shadow table: %s", i, + sqlite3_errmsg(db)); + goto error; + } + sqlite3_finalize(stmt); + + } + } + if(pNew->numAuxiliaryColumns > 0) { sqlite3_stmt * stmt; sqlite3_str * s = sqlite3_str_new(NULL); @@ -4800,7 +5187,7 @@ static int vec0Destroy(sqlite3_vtab *pVtab) { // Free up any sqlite3_stmt, otherwise DROPs on those tables will fail vec0_free_resources(p); - // later: can't evidence-of here, bc always gives "SQL logic error" instead of + // TODO(test) later: can't evidence-of here, bc always gives "SQL logic error" instead of // provided error zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_CHUNKS_NAME, p->schemaName, p->tableName); @@ -4813,6 +5200,17 @@ static int vec0Destroy(sqlite3_vtab *pVtab) { } sqlite3_finalize(stmt); + zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_INFO_NAME, p->schemaName, + p->tableName); + rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0); + sqlite3_free((void *)zSql); + if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) { + rc = SQLITE_ERROR; + vtab_set_error(pVtab, "could not drop info shadow table"); + goto done; + } + sqlite3_finalize(stmt); + zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_ROWIDS_NAME, p->schemaName, p->tableName); rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0); @@ -4846,6 +5244,29 @@ static int vec0Destroy(sqlite3_vtab *pVtab) { sqlite3_finalize(stmt); } + + for (int i = 0; i < p->numMetadataColumns; i++) { + zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_METADATA_N_NAME, p->schemaName,p->tableName, i); + rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0); + sqlite3_free((void *)zSql); + if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) { + rc = SQLITE_ERROR; + goto done; + } + sqlite3_finalize(stmt); + + if(p->metadata_columns[i].kind == VEC0_METADATA_COLUMN_KIND_TEXT) { + zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME, p->schemaName,p->tableName, i); + rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0); + sqlite3_free((void *)zSql); + if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) { + rc = SQLITE_ERROR; + goto done; + } + sqlite3_finalize(stmt); + } + } + stmt = NULL; rc = SQLITE_OK; @@ -4887,6 +5308,7 @@ typedef enum { VEC0_IDXSTR_KIND_KNN_ROWID_IN = '[', VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT = ']', VEC0_IDXSTR_KIND_POINT_ID = '!', + VEC0_IDXSTR_KIND_METADATA_CONSTRAINT = '&', } vec0_idxstr_kind; // The different SQLITE_INDEX_CONSTRAINT values that vec0 partition key columns @@ -4901,6 +5323,15 @@ typedef enum { VEC0_PARTITION_OPERATOR_GE = 'e', VEC0_PARTITION_OPERATOR_NE = 'f', } vec0_partition_operator; +typedef enum { + VEC0_METADATA_OPERATOR_EQ = 'a', + VEC0_METADATA_OPERATOR_GT = 'b', + VEC0_METADATA_OPERATOR_LE = 'c', + VEC0_METADATA_OPERATOR_LT = 'd', + VEC0_METADATA_OPERATOR_GE = 'e', + VEC0_METADATA_OPERATOR_NE = 'f', + VEC0_METADATA_OPERATOR_IN = 'g', +} vec0_metadata_operator; static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) { vec0_vtab *p = (vec0_vtab *)pVTab; @@ -5115,6 +5546,102 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) { } + for (int i = 0; i < pIdxInfo->nConstraint; i++) { + if (!pIdxInfo->aConstraint[i].usable) + continue; + + int iColumn = pIdxInfo->aConstraint[i].iColumn; + int op = pIdxInfo->aConstraint[i].op; + if(op == SQLITE_INDEX_CONSTRAINT_LIMIT || op == SQLITE_INDEX_CONSTRAINT_OFFSET) { + continue; + } + if(!vec0_column_idx_is_metadata(p, iColumn)) { + continue; + } + + int metadata_idx = vec0_column_idx_to_metadata_idx(p, iColumn); + char value = 0; + + switch(op) { + case SQLITE_INDEX_CONSTRAINT_EQ: { + int vtabIn = 0; + #if COMPILER_SUPPORTS_VTAB_IN + if (sqlite3_libversion_number() >= 3038000) { + vtabIn = sqlite3_vtab_in(pIdxInfo, i, -1); + } + if(vtabIn) { + switch(p->metadata_columns[metadata_idx].kind) { + case VEC0_METADATA_COLUMN_KIND_FLOAT: + case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { + // IMP: V15248_32086 + rc = SQLITE_ERROR; + vtab_set_error(pVTab, "'xxx in (...)' is only available on INTEGER or TEXT metadata columns."); + goto done; + break; + } + case VEC0_METADATA_COLUMN_KIND_INTEGER: + case VEC0_METADATA_COLUMN_KIND_TEXT: { + break; + } + } + value = VEC0_METADATA_OPERATOR_IN; + sqlite3_vtab_in(pIdxInfo, i, 1); + }else + #endif + { + value = VEC0_PARTITION_OPERATOR_EQ; + } + break; + } + case SQLITE_INDEX_CONSTRAINT_GT: { + value = VEC0_METADATA_OPERATOR_GT; + break; + } + case SQLITE_INDEX_CONSTRAINT_LE: { + value = VEC0_METADATA_OPERATOR_LE; + break; + } + case SQLITE_INDEX_CONSTRAINT_LT: { + value = VEC0_METADATA_OPERATOR_LT; + break; + } + case SQLITE_INDEX_CONSTRAINT_GE: { + value = VEC0_METADATA_OPERATOR_GE; + break; + } + case SQLITE_INDEX_CONSTRAINT_NE: { + value = VEC0_METADATA_OPERATOR_NE; + break; + } + default: { + // IMP: V16511_00582 + rc = SQLITE_ERROR; + vtab_set_error(pVTab, + "An illegal WHERE constraint was provided on a vec0 metadata column in a KNN query. " + "Only one of EQUALS, GREATER_THAN, LESS_THAN_OR_EQUAL, LESS_THAN, GREATER_THAN_OR_EQUAL, NOT_EQUALS is allowed." + ); + goto done; + } + } + + if(p->metadata_columns[metadata_idx].kind == VEC0_METADATA_COLUMN_KIND_BOOLEAN) { + if(!(value == VEC0_METADATA_OPERATOR_EQ || value == VEC0_METADATA_OPERATOR_NE)) { + // IMP: V10145_26984 + rc = SQLITE_ERROR; + vtab_set_error(pVTab, "ONLY EQUALS (=) or NOT_EQUALS (!=) operators are allowed on boolean metadata columns."); + goto done; + } + } + + pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++; + pIdxInfo->aConstraintUsage[i].omit = 1; + sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_METADATA_CONSTRAINT); + sqlite3_str_appendchar(idxStr, 1, 'A' + metadata_idx); + sqlite3_str_appendchar(idxStr, 1, value); + sqlite3_str_appendchar(idxStr, 1, '_'); + + } + pIdxInfo->idxNum = iMatchVectorTerm; @@ -5284,6 +5811,41 @@ int min_idx(const f32 *distances, i32 n, u8 *candidates, i32 *out, i32 k, return SQLITE_OK; } +int vec0_get_metadata_text_long_value( + vec0_vtab * p, + sqlite3_stmt ** stmt, + int metadata_idx, + i64 rowid, + int *n, + char ** s) { + int rc; + if(!(*stmt)) { + const char * zSql = sqlite3_mprintf("select data from " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " where rowid = ?", p->schemaName, p->tableName, metadata_idx); + if(!zSql) { + rc = SQLITE_NOMEM; + goto done; + } + rc = sqlite3_prepare_v2(p->db, zSql, -1, stmt, NULL); + sqlite3_free( (void *) zSql); + if(rc != SQLITE_OK) { + goto done; + } + } + + sqlite3_reset(*stmt); + sqlite3_bind_int64(*stmt, 1, rowid); + rc = sqlite3_step(*stmt); + if(rc != SQLITE_ROW) { + rc = SQLITE_ERROR; + goto done; + } + *s = (char *) sqlite3_column_text(*stmt, 0); + *n = sqlite3_column_bytes(*stmt, 0); + rc = SQLITE_OK; + done: + return rc; +} + /** * @brief Crete at "iterator" (sqlite3_stmt) of chunks with the given constraints * @@ -5303,6 +5865,7 @@ int vec0_chunks_iter(vec0_vtab * p, const char * idxStr, int argc, sqlite3_value int idxStrLength = strlen(idxStr); // "1" refers to the initial vec0_query_plan char, 4 is the number of chars per "element" int numValueEntries = (idxStrLength-1) / 4; + assert(argc == numValueEntries); int rc; sqlite3_str * s = sqlite3_str_new(NULL); @@ -5381,9 +5944,513 @@ int vec0_chunks_iter(vec0_vtab * p, const char * idxStr, int argc, sqlite3_value return rc; } +// a single `xxx in (...)` constraint on a metadata column. TEXT or INTEGER only for now. +struct Vec0MetadataIn{ + // index of argv[i]` the constraint is on + int argv_idx; + // metadata column index of the constraint, derived from idxStr + argv_idx + int metadata_idx; + // array of the copied `(...)` values from sqlite3_vtab_in_first()/sqlite3_vtab_in_next() + struct Array array; +}; + +// Array elements for `xxx in (...)` values for a text column. basically just a string +struct Vec0MetadataInTextEntry { + int n; + char * zString; +}; + + +int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void * buffer, int size, vec0_metadata_operator op, u8* b, int metadata_idx, int chunk_rowid, struct Array * aMetadataIn, int argv_idx) { + int rc; + sqlite3_stmt * stmt = NULL; + i64 * rowids = NULL; + sqlite3_blob * rowidsBlob; + const char * sTarget = (const char *) sqlite3_value_text(value); + int nTarget = sqlite3_value_bytes(value); + + + // TODO(perf): only text metadata news the rowids BLOB. Make it so that + // rowids BLOB is re-used when multiple fitlers on text columns, + // ex "name BETWEEN 'a' and 'b'"" + rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids", chunk_rowid, 0, &rowidsBlob); + if(rc != SQLITE_OK) { + return rc; + } + assert(sqlite3_blob_bytes(rowidsBlob) % sizeof(i64) == 0); + assert((sqlite3_blob_bytes(rowidsBlob) / sizeof(i64)) == size); + + rowids = sqlite3_malloc(sqlite3_blob_bytes(rowidsBlob)); + if(!rowids) { + sqlite3_blob_close(rowidsBlob); + return SQLITE_NOMEM; + } + + rc = sqlite3_blob_read(rowidsBlob, rowids, sqlite3_blob_bytes(rowidsBlob), 0); + if(rc != SQLITE_OK) { + sqlite3_blob_close(rowidsBlob); + return rc; + } + sqlite3_blob_close(rowidsBlob); + + switch(op) { + int nPrefix; + char * sPrefix; + char *sFull; + int nFull; + u8 * view; + case VEC0_METADATA_OPERATOR_EQ: { + for(int i = 0; i < size; i++) { + view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; + nPrefix = ((int*) view)[0]; + sPrefix = (char *) &view[4]; + + // for EQ the text lengths must match + if(nPrefix != nTarget) { + bitmap_set(b, i, 0); + continue; + } + int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)); + + // for short strings, use the prefix comparison direclty + if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) { + bitmap_set(b, i, cmpPrefix == 0); + continue; + } + // for EQ on longs strings, the prefix must match + if(cmpPrefix) { + bitmap_set(b, i, 0); + continue; + } + // consult the full string + rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); + if(rc != SQLITE_OK) { + goto done; + } + if(nPrefix != nFull) { + rc = SQLITE_ERROR; + goto done; + } + bitmap_set(b, i, strncmp(sFull, sTarget, nFull) == 0); + } + break; + } + case VEC0_METADATA_OPERATOR_NE: { + for(int i = 0; i < size; i++) { + view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; + nPrefix = ((int*) view)[0]; + sPrefix = (char *) &view[4]; + + // for NE if text lengths dont match, it never will + if(nPrefix != nTarget) { + bitmap_set(b, i, 1); + continue; + } + + int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)); + + // for short strings, use the prefix comparison direclty + if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) { + bitmap_set(b, i, cmpPrefix != 0); + continue; + } + // for NE on longs strings, if prefixes dont match, then long string wont + if(cmpPrefix) { + bitmap_set(b, i, 1); + continue; + } + // consult the full string + rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); + if(rc != SQLITE_OK) { + goto done; + } + if(nPrefix != nFull) { + rc = SQLITE_ERROR; + goto done; + } + bitmap_set(b, i, strncmp(sFull, sTarget, nFull) != 0); + } + break; + } + case VEC0_METADATA_OPERATOR_GT: { + for(int i = 0; i < size; i++) { + view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; + nPrefix = ((int*) view)[0]; + sPrefix = (char *) &view[4]; + int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget)); + + if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) { + // if prefix match, check which is longer + if(cmpPrefix == 0) { + bitmap_set(b, i, nPrefix > nTarget); + } + else { + bitmap_set(b, i, cmpPrefix > 0); + } + continue; + } + // TODO(perf): may not need to compare full text in some cases + + rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); + if(rc != SQLITE_OK) { + goto done; + } + if(nPrefix != nFull) { + rc = SQLITE_ERROR; + goto done; + } + bitmap_set(b, i, strncmp(sFull, sTarget, nFull) > 0); + } + break; + } + case VEC0_METADATA_OPERATOR_GE: { + for(int i = 0; i < size; i++) { + view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; + nPrefix = ((int*) view)[0]; + sPrefix = (char *) &view[4]; + int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget)); + + if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) { + // if prefix match, check which is longer + if(cmpPrefix == 0) { + bitmap_set(b, i, nPrefix >= nTarget); + } + else { + bitmap_set(b, i, cmpPrefix >= 0); + } + continue; + } + // TODO(perf): may not need to compare full text in some cases + + rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); + if(rc != SQLITE_OK) { + goto done; + } + if(nPrefix != nFull) { + rc = SQLITE_ERROR; + goto done; + } + bitmap_set(b, i, strncmp(sFull, sTarget, nFull) >= 0); + } + break; + } + case VEC0_METADATA_OPERATOR_LE: { + for(int i = 0; i < size; i++) { + view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; + nPrefix = ((int*) view)[0]; + sPrefix = (char *) &view[4]; + int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget)); + + if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) { + // if prefix match, check which is longer + if(cmpPrefix == 0) { + bitmap_set(b, i, nPrefix <= nTarget); + } + else { + bitmap_set(b, i, cmpPrefix <= 0); + } + continue; + } + // TODO(perf): may not need to compare full text in some cases + + rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); + if(rc != SQLITE_OK) { + goto done; + } + if(nPrefix != nFull) { + rc = SQLITE_ERROR; + goto done; + } + bitmap_set(b, i, strncmp(sFull, sTarget, nFull) <= 0); + } + break; + } + case VEC0_METADATA_OPERATOR_LT: { + for(int i = 0; i < size; i++) { + view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; + nPrefix = ((int*) view)[0]; + sPrefix = (char *) &view[4]; + int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget)); + + if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) { + // if prefix match, check which is longer + if(cmpPrefix == 0) { + bitmap_set(b, i, nPrefix < nTarget); + } + else { + bitmap_set(b, i, cmpPrefix < 0); + } + continue; + } + // TODO(perf): may not need to compare full text in some cases + + rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); + if(rc != SQLITE_OK) { + goto done; + } + if(nPrefix != nFull) { + rc = SQLITE_ERROR; + goto done; + } + bitmap_set(b, i, strncmp(sFull, sTarget, nFull) < 0); + } + break; + } + + case VEC0_METADATA_OPERATOR_IN: { + size_t metadataInIdx = -1; + for(size_t i = 0; i < aMetadataIn->length; i++) { + struct Vec0MetadataIn * metadataIn = &(((struct Vec0MetadataIn *) aMetadataIn->z)[i]); + if(metadataIn->argv_idx == argv_idx) { + metadataInIdx = i; + break; + } + } + if(metadataInIdx < 0) { + rc = SQLITE_ERROR; + goto done; + } + + struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[metadataInIdx]; + struct Array * aTarget = &(metadataIn->array); + + + int nPrefix; + char * sPrefix; + char *sFull; + int nFull; + u8 * view; + for(int i = 0; i < size; i++) { + view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; + nPrefix = ((int*) view)[0]; + sPrefix = (char *) &view[4]; + for(size_t target_idx = 0; target_idx < aTarget->length; target_idx++) { + struct Vec0MetadataInTextEntry * entry = &(((struct Vec0MetadataInTextEntry*)aTarget->z)[target_idx]); + if(entry->n != nPrefix) { + continue; + } + int cmpPrefix = strncmp(sPrefix, entry->zString, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)); + if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) { + if(cmpPrefix == 0) { + bitmap_set(b, i, 1); + break; + } + continue; + } + if(cmpPrefix) { + continue; + } + + rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); + if(rc != SQLITE_OK) { + goto done; + } + if(nPrefix != nFull) { + rc = SQLITE_ERROR; + goto done; + } + if(strncmp(sFull, entry->zString, nFull) == 0) { + bitmap_set(b, i, 1); + break; + } + } + } + break; + } + + } + rc = SQLITE_OK; + + done: + sqlite3_finalize(stmt); + sqlite3_free(rowids); + return rc; + +} + +/** + * @brief Fill in bitmap of chunk values, whether or not the values match a metadata constraint + * + * @param p vec0_vtab + * @param metadata_idx index of the metatadata column to perfrom constraints on + * @param value sqlite3_value of the constraints value + * @param blob sqlite3_blob that is already opened on the metdata column's shadow chunk table + * @param chunk_rowid rowid of the chunk to calculate on + * @param b pre-allocated and zero'd out bitmap to write results to + * @param size size of the chunk + * @return int SQLITE_OK on success, error code otherwise + */ +int vec0_set_metadata_filter_bitmap( + vec0_vtab *p, + int metadata_idx, + vec0_metadata_operator op, + sqlite3_value * value, + sqlite3_blob * blob, + i64 chunk_rowid, + u8* b, + int size, + struct Array * aMetadataIn, int argv_idx) { + // TODO: shouldn't this skip in-valid entries from the chunk's validity bitmap? + + int rc; + rc = sqlite3_blob_reopen(blob, chunk_rowid); + if(rc != SQLITE_OK) { + return rc; + } + + vec0_metadata_column_kind kind = p->metadata_columns[metadata_idx].kind; + int szMatch = 0; + int blobSize = sqlite3_blob_bytes(blob); + switch(kind) { + case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { + szMatch = blobSize == size / CHAR_BIT; + break; + } + case VEC0_METADATA_COLUMN_KIND_INTEGER: { + szMatch = blobSize == size * sizeof(i64); + break; + } + case VEC0_METADATA_COLUMN_KIND_FLOAT: { + szMatch = blobSize == size * sizeof(double); + break; + } + case VEC0_METADATA_COLUMN_KIND_TEXT: { + szMatch = blobSize == size * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH; + break; + } + } + if(!szMatch) { + return SQLITE_ERROR; + } + void * buffer = sqlite3_malloc(blobSize); + if(!buffer) { + return SQLITE_NOMEM; + } + rc = sqlite3_blob_read(blob, buffer, blobSize, 0); + if(rc != SQLITE_OK) { + goto done; + } + switch(kind) { + case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { + int target = sqlite3_value_int(value); + if( (target && op == VEC0_METADATA_OPERATOR_EQ) || (!target && op == VEC0_METADATA_OPERATOR_NE)) { + for(int i = 0; i < size; i++) { bitmap_set(b, i, bitmap_get((u8*) buffer, i)); } + } + else { + for(int i = 0; i < size; i++) { bitmap_set(b, i, !bitmap_get((u8*) buffer, i)); } + } + break; + } + case VEC0_METADATA_COLUMN_KIND_INTEGER: { + i64 * array = (i64*) buffer; + i64 target = sqlite3_value_int64(value); + switch(op) { + case VEC0_METADATA_OPERATOR_EQ: { + for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] == target); } + break; + } + case VEC0_METADATA_OPERATOR_GT: { + for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] > target); } + break; + } + case VEC0_METADATA_OPERATOR_LE: { + for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] <= target); } + break; + } + case VEC0_METADATA_OPERATOR_LT: { + for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] < target); } + break; + } + case VEC0_METADATA_OPERATOR_GE: { + for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] >= target); } + break; + } + case VEC0_METADATA_OPERATOR_NE: { + for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] != target); } + break; + } + case VEC0_METADATA_OPERATOR_IN: { + int metadataInIdx = -1; + for(size_t i = 0; i < aMetadataIn->length; i++) { + struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[i]; + if(metadataIn->argv_idx == argv_idx) { + metadataInIdx = i; + break; + } + } + if(metadataInIdx < 0) { + rc = SQLITE_ERROR; + goto done; + } + struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[metadataInIdx]; + struct Array * aTarget = &(metadataIn->array); + + for(int i = 0; i < size; i++) { + for(size_t target_idx = 0; target_idx < aTarget->length; target_idx++) { + if( ((i64*)aTarget->z)[target_idx] == array[i]) { + bitmap_set(b, i, 1); + break; + } + } + } + break; + } + } + break; + } + case VEC0_METADATA_COLUMN_KIND_FLOAT: { + double * array = (double*) buffer; + double target = sqlite3_value_double(value); + switch(op) { + case VEC0_METADATA_OPERATOR_EQ: { + for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] == target); } + break; + } + case VEC0_METADATA_OPERATOR_GT: { + for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] > target); } + break; + } + case VEC0_METADATA_OPERATOR_LE: { + for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] <= target); } + break; + } + case VEC0_METADATA_OPERATOR_LT: { + for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] < target); } + break; + } + case VEC0_METADATA_OPERATOR_GE: { + for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] >= target); } + break; + } + case VEC0_METADATA_OPERATOR_NE: { + for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] != target); } + break; + } + case VEC0_METADATA_OPERATOR_IN: { + // should never be reached + break; + } + } + break; + } + case VEC0_METADATA_COLUMN_KIND_TEXT: { + rc = vec0_metadata_filter_text(p, value, buffer, size, op, b, metadata_idx, chunk_rowid, aMetadataIn, argv_idx); + if(rc != SQLITE_OK) { + goto done; + } + break; + } + } + done: + sqlite3_free(buffer); + return rc; +} + int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks, struct VectorColumnDefinition *vector_column, int vectorColumnIdx, struct Array *arrayRowidsIn, + struct Array * aMetadataIn, + const char * idxStr, int argc, sqlite3_value ** argv, void *queryVector, i64 k, i64 **out_topk_rowids, f32 **out_topk_distances, i64 *out_used) { // for each chunk, get top min(k, chunk_size) rowid + distances to query vec. @@ -5407,6 +6474,7 @@ int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks, u8 *bTaken = NULL; // memory: chunk_size / 8 i32 *chunk_topk_idxs = NULL; // memory: k * 4 u8 *bmRowids = NULL; // memory: chunk_size / 8 + u8 *bmMetadata = NULL; // memory: chunk_size / 8 // // total: a lot??? // 6 * (k * 4) + (k * 2) + (chunk_size / 8) + (chunk_size * dimensions * 4) @@ -5477,6 +6545,28 @@ int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks, goto cleanup; } + sqlite3_blob * metadataBlobs[VEC0_MAX_METADATA_COLUMNS]; + memset(metadataBlobs, 0, sizeof(sqlite3_blob*) * VEC0_MAX_METADATA_COLUMNS); + + bmMetadata = bitmap_new(p->chunk_size); + if(!bmMetadata) { + rc = SQLITE_NOMEM; + goto cleanup; + } + + int idxStrLength = strlen(idxStr); + int numValueEntries = (idxStrLength-1) / 4; + assert(numValueEntries == argc); + int hasMetadataFilters = 0; + for(int i = 0; i < argc; i++) { + int idx = 1 + (i * 4); + char kind = idxStr[idx + 0]; + if(kind == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT) { + hasMetadataFilters = 1; + break; + } + } + while (true) { rc = sqlite3_step(stmtChunks); if (rc == SQLITE_DONE) { @@ -5565,6 +6655,37 @@ int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks, bitmap_and_inplace(b, bmRowids, p->chunk_size); } + if(hasMetadataFilters) { + for(int i = 0; i < argc; i++) { + int idx = 1 + (i * 4); + char kind = idxStr[idx + 0]; + if(kind != VEC0_IDXSTR_KIND_METADATA_CONSTRAINT) { + continue; + } + int metadata_idx = idxStr[idx + 1] - 'A'; + int operator = idxStr[idx + 2]; + + if(!metadataBlobs[metadata_idx]) { + rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 0, &metadataBlobs[metadata_idx]); + vtab_set_error(&p->base, "Could not open metadata blob"); + if(rc != SQLITE_OK) { + goto cleanup; + } + } + + bitmap_clear(bmMetadata, p->chunk_size); + rc = vec0_set_metadata_filter_bitmap(p, metadata_idx, operator, argv[i], metadataBlobs[metadata_idx], chunk_id, bmMetadata, p->chunk_size, aMetadataIn, i); + if(rc != SQLITE_OK) { + vtab_set_error(&p->base, "Could not filter metadata fields"); + if(rc != SQLITE_OK) { + goto cleanup; + } + } + bitmap_and_inplace(b, bmMetadata, p->chunk_size); + } + } + + for (int i = 0; i < p->chunk_size; i++) { if (!bitmap_get(b, i)) { continue; @@ -5668,6 +6789,10 @@ cleanup: sqlite3_free(bmRowids); sqlite3_free(baseVectors); sqlite3_free(chunk_distances); + sqlite3_free(bmMetadata); + for(int i = 0; i < VEC0_MAX_METADATA_COLUMNS; i++) { + sqlite3_blob_close(metadataBlobs[i]); + } // blobVectors is always opened with read-only permissions, so this never // fails. sqlite3_blob_close(blobVectors); @@ -5696,6 +6821,8 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum, return SQLITE_NOMEM; } memset(knn_data, 0, sizeof(*knn_data)); + // array of `struct Vec0MetadataIn`, IF there are any `xxx in (...)` metadata constraints + struct Array * aMetadataIn = NULL; int query_idx =-1; int k_idx = -1; @@ -5815,6 +6942,95 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum, } #endif + #if COMPILER_SUPPORTS_VTAB_IN + for(int i = 0; i < argc; i++) { + if(!(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT && idxStr[1 + (i*4) + 2] == VEC0_METADATA_OPERATOR_IN)) { + continue; + } + int metadata_idx = idxStr[1 + (i*4) + 1] - 'A'; + if(!aMetadataIn) { + aMetadataIn = sqlite3_malloc(sizeof(*aMetadataIn)); + if(!aMetadataIn) { + rc = SQLITE_NOMEM; + goto cleanup; + } + memset(aMetadataIn, 0, sizeof(*aMetadataIn)); + rc = array_init(aMetadataIn, sizeof(struct Vec0MetadataIn), 8); + if(rc != SQLITE_OK) { + goto cleanup; + } + } + + struct Vec0MetadataIn item; + memset(&item, 0, sizeof(item)); + item.metadata_idx=metadata_idx; + item.argv_idx = i; + + switch(p->metadata_columns[metadata_idx].kind) { + case VEC0_METADATA_COLUMN_KIND_INTEGER: { + rc = array_init(&item.array, sizeof(i64), 16); + if(rc != SQLITE_OK) { + goto cleanup; + } + sqlite3_value *entry; + for (rc = sqlite3_vtab_in_first(argv[i], &entry); rc == SQLITE_OK && entry; rc = sqlite3_vtab_in_next(argv[i], &entry)) { + i64 v = sqlite3_value_int64(entry); + rc = array_append(&item.array, &v); + if (rc != SQLITE_OK) { + goto cleanup; + } + } + + if (rc != SQLITE_DONE) { + vtab_set_error(&p->base, "Error fetching next value in `x in (...)` integer expression"); + goto cleanup; + } + + break; + } + case VEC0_METADATA_COLUMN_KIND_TEXT: { + rc = array_init(&item.array, sizeof(struct Vec0MetadataInTextEntry), 16); + if(rc != SQLITE_OK) { + goto cleanup; + } + sqlite3_value *entry; + for (rc = sqlite3_vtab_in_first(argv[i], &entry); rc == SQLITE_OK && entry; rc = sqlite3_vtab_in_next(argv[i], &entry)) { + const char * s = (const char *) sqlite3_value_text(entry); + int n = sqlite3_value_bytes(entry); + + struct Vec0MetadataInTextEntry entry; + entry.zString = sqlite3_mprintf("%.*s", n, s); + if(!entry.zString) { + rc = SQLITE_NOMEM; + goto cleanup; + } + entry.n = n; + rc = array_append(&item.array, &entry); + if (rc != SQLITE_OK) { + goto cleanup; + } + } + + if (rc != SQLITE_DONE) { + vtab_set_error(&p->base, "Error fetching next value in `x in (...)` text expression"); + goto cleanup; + } + + break; + } + default: { + vtab_set_error(&p->base, "Internal sqlite-vec error"); + goto cleanup; + } + } + + rc = array_append(aMetadataIn, &item); + if(rc != SQLITE_OK) { + goto cleanup; + } + } + #endif + rc = vec0_chunks_iter(p, idxStr, argc, argv, &stmtChunks); if (rc != SQLITE_OK) { // IMP: V06942_23781 @@ -5827,7 +7043,7 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum, f32 *topk_distances = NULL; i64 k_used = 0; rc = vec0Filter_knn_chunks_iter(p, stmtChunks, vector_column, vectorColumnIdx, - arrayRowidsIn, queryVector, k, &topk_rowids, + arrayRowidsIn, aMetadataIn, idxStr, argc, argv, queryVector, k, &topk_rowids, &topk_distances, &k_used); if (rc != SQLITE_OK) { goto cleanup; @@ -5848,6 +7064,21 @@ cleanup: array_cleanup(arrayRowidsIn); sqlite3_free(arrayRowidsIn); queryVectorCleanup(queryVector); + if(aMetadataIn) { + for(size_t i = 0; i < aMetadataIn->length; i++) { + struct Vec0MetadataIn* item = &((struct Vec0MetadataIn *) aMetadataIn->z)[i]; + for(size_t j = 0; j < item->array.length; j++) { + if(p->metadata_columns[item->metadata_idx].kind == VEC0_METADATA_COLUMN_KIND_TEXT) { + struct Vec0MetadataInTextEntry entry = ((struct Vec0MetadataInTextEntry*)item->array.z)[j]; + sqlite3_free(entry.zString); + } + } + array_cleanup(&item->array); + } + array_cleanup(aMetadataIn); + } + + sqlite3_free(aMetadataIn); return rc; } @@ -6119,6 +7350,29 @@ static int vec0Column_fullscan(vec0_vtab *pVtab, vec0_cursor *pCur, sqlite3_result_error_code(context, rc); } } + + else if(vec0_column_idx_is_metadata(pVtab, i)) { + if(sqlite3_vtab_nochange(context)) { + return SQLITE_OK; + } + int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i); + int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context); + if(rc != SQLITE_OK) { + // IMP: V15466_32305 + const char * zErr = sqlite3_mprintf( + "Could not extract metadata value for column %.*s at rowid %lld", + pVtab->metadata_columns[metadata_idx].name_length, + pVtab->metadata_columns[metadata_idx].name, rowid + ); + if(zErr) { + sqlite3_result_error(context, zErr, -1); + sqlite3_free((void *) zErr); + }else { + sqlite3_result_error_nomem(context); + } + } + } + return SQLITE_OK; } @@ -6181,6 +7435,28 @@ static int vec0Column_point(vec0_vtab *pVtab, vec0_cursor *pCur, } } + else if(vec0_column_idx_is_metadata(pVtab, i)) { + if(sqlite3_vtab_nochange(context)) { + return SQLITE_OK; + } + i64 rowid = pCur->point_data->rowid; + int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i); + int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context); + if(rc != SQLITE_OK) { + const char * zErr = sqlite3_mprintf( + "Could not extract metadata value for column %.*s at rowid %lld", + pVtab->metadata_columns[metadata_idx].name_length, + pVtab->metadata_columns[metadata_idx].name, rowid + ); + if(zErr) { + sqlite3_result_error(context, zErr, -1); + sqlite3_free((void *) zErr); + }else { + sqlite3_result_error_nomem(context); + } + } + } + return SQLITE_OK; } @@ -6240,6 +7516,25 @@ static int vec0Column_knn(vec0_vtab *pVtab, vec0_cursor *pCur, } } + else if(vec0_column_idx_is_metadata(pVtab, i)) { + int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i); + i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx]; + int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context); + if(rc != SQLITE_OK) { + const char * zErr = sqlite3_mprintf( + "Could not extract metadata value for column %.*s at rowid %lld", + pVtab->metadata_columns[metadata_idx].name_length, + pVtab->metadata_columns[metadata_idx].name, rowid + ); + if(zErr) { + sqlite3_result_error(context, zErr, -1); + sqlite3_free((void *) zErr); + }else { + sqlite3_result_error_nomem(context); + } + } + } + return SQLITE_OK; } @@ -6654,6 +7949,160 @@ cleanup: return rc; } +int vec0_write_metadata_value(vec0_vtab *p, int metadata_column_idx, i64 rowid, i64 chunk_id, i64 chunk_offset, sqlite3_value * v, int isupdate) { + int rc; + struct Vec0MetadataColumnDefinition * metadata_column = &p->metadata_columns[metadata_column_idx]; + vec0_metadata_column_kind kind = metadata_column->kind; + + // verify input value matches column type + switch(kind) { + case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { + if(sqlite3_value_type(v) != SQLITE_INTEGER || ((sqlite3_value_int(v) != 0) && (sqlite3_value_int(v) != 1))) { + rc = SQLITE_ERROR; + vtab_set_error(&p->base, "Expected 0 or 1 for BOOLEAN metadata column %.*s", metadata_column->name_length, metadata_column->name); + goto done; + } + break; + } + case VEC0_METADATA_COLUMN_KIND_INTEGER: { + if(sqlite3_value_type(v) != SQLITE_INTEGER) { + rc = SQLITE_ERROR; + vtab_set_error(&p->base, "Expected integer for INTEGER metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_type(v))); + goto done; + } + break; + } + case VEC0_METADATA_COLUMN_KIND_FLOAT: { + if(sqlite3_value_type(v) != SQLITE_FLOAT) { + rc = SQLITE_ERROR; + vtab_set_error(&p->base, "Expected float for FLOAT metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_type(v))); + goto done; + } + break; + } + case VEC0_METADATA_COLUMN_KIND_TEXT: { + if(sqlite3_value_type(v) != SQLITE_TEXT) { + rc = SQLITE_ERROR; + vtab_set_error(&p->base, "Expected text for TEXT metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_type(v))); + goto done; + } + break; + } + } + + sqlite3_blob * blobValue = NULL; + rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_column_idx], "data", chunk_id, 1, &blobValue); + if(rc != SQLITE_OK) { + goto done; + } + + switch(kind) { + case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { + u8 block; + int value = sqlite3_value_int(v); + rc = sqlite3_blob_read(blobValue, &block, sizeof(u8), (int) (chunk_offset / CHAR_BIT)); + if(rc != SQLITE_OK) { + goto done; + } + + if (value) { + block |= 1 << (chunk_offset % CHAR_BIT); + } else { + block &= ~(1 << (chunk_offset % CHAR_BIT)); + } + + rc = sqlite3_blob_write(blobValue, &block, sizeof(u8), chunk_offset / CHAR_BIT); + break; + } + case VEC0_METADATA_COLUMN_KIND_INTEGER: { + i64 value = sqlite3_value_int64(v); + rc = sqlite3_blob_write(blobValue, &value, sizeof(value), chunk_offset * sizeof(i64)); + break; + } + case VEC0_METADATA_COLUMN_KIND_FLOAT: { + double value = sqlite3_value_double(v); + rc = sqlite3_blob_write(blobValue, &value, sizeof(value), chunk_offset * sizeof(double)); + break; + } + case VEC0_METADATA_COLUMN_KIND_TEXT: { + int prev_n; + rc = sqlite3_blob_read(blobValue, &prev_n, sizeof(int), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH); + if(rc != SQLITE_OK) { + goto done; + } + + const char * s = (const char *) sqlite3_value_text(v); + int n = sqlite3_value_bytes(v); + u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; + memset(view, 0, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH); + memcpy(view, &n, sizeof(int)); + memcpy(view+4, s, min(n, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH-4)); + + rc = sqlite3_blob_write(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH); + if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) { + const char * zSql; + + if(isupdate && (prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)) { + zSql = sqlite3_mprintf("UPDATE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " SET data = ?2 WHERE rowid = ?1", p->schemaName, p->tableName, metadata_column_idx); + }else { + zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " (rowid, data) VALUES (?1, ?2)", p->schemaName, p->tableName, metadata_column_idx); + } + if(!zSql) { + rc = SQLITE_NOMEM; + goto done; + } + sqlite3_stmt * stmt; + rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL); + if(rc != SQLITE_OK) { + goto done; + } + sqlite3_bind_int64(stmt, 1, rowid); + sqlite3_bind_text(stmt, 2, s, n, SQLITE_STATIC); + rc = sqlite3_step(stmt); + sqlite3_finalize(stmt); + + if(rc != SQLITE_DONE) { + rc = SQLITE_ERROR; + goto done; + } + } + else if(prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) { + const char * zSql = sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_column_idx); + if(!zSql) { + rc = SQLITE_NOMEM; + goto done; + } + sqlite3_stmt * stmt; + rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL); + if(rc != SQLITE_OK) { + goto done; + } + sqlite3_bind_int64(stmt, 1, rowid); + rc = sqlite3_step(stmt); + sqlite3_finalize(stmt); + + if(rc != SQLITE_DONE) { + rc = SQLITE_ERROR; + goto done; + } + } + break; + } + } + + if(rc != SQLITE_OK) { + + } + rc = sqlite3_blob_close(blobValue); + if(rc != SQLITE_OK) { + goto done; + } + + done: + return rc; +} + + /** * @brief Handles INSERT INTO operations on a vec0 table. * @@ -6713,67 +8162,6 @@ int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv, } } - if(p->numAuxiliaryColumns > 0) { - sqlite3_stmt *stmt; - sqlite3_str * s = sqlite3_str_new(NULL); - sqlite3_str_appendf(s, "INSERT INTO " VEC0_SHADOW_AUXILIARY_NAME "(", p->schemaName, p->tableName); - for(int i = 0; i < p->numAuxiliaryColumns; i++) { - if(i!=0) { - sqlite3_str_appendchar(s, 1, ','); - } - sqlite3_str_appendf(s, "value%02d", i); - } - sqlite3_str_appendall(s, ") VALUES ("); - for(int i = 0; i < p->numAuxiliaryColumns; i++) { - if(i!=0) { - sqlite3_str_appendchar(s, 1, ','); - } - sqlite3_str_appendchar(s, 1, '?'); - } - sqlite3_str_appendall(s, ")"); - char * zSql = sqlite3_str_finish(s); - // TODO double check error handling ehre - if(!zSql) { - rc = SQLITE_NOMEM; - goto cleanup; - } - rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL); - if(rc != SQLITE_OK) { - goto cleanup; - } - - for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { - if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY) { - continue; - } - int auxiliary_key_idx = p->user_column_idxs[i]; - sqlite3_value * v = argv[2+VEC0_COLUMN_USERN_START + i]; - int v_type = sqlite3_value_type(v); - if(v_type != SQLITE_NULL && (v_type != p->auxiliary_columns[auxiliary_key_idx].type)) { - sqlite3_finalize(stmt); - rc = SQLITE_ERROR; - vtab_set_error( - pVTab, - "Auxiliary column type mismatch: The auxiliary column %.*s has type %s, but %s was provided.", - p->auxiliary_columns[auxiliary_key_idx].name_length, - p->auxiliary_columns[auxiliary_key_idx].name, - type_name(p->auxiliary_columns[auxiliary_key_idx].type), - type_name(v_type) - ); - goto cleanup; - } - sqlite3_bind_value(stmt, 1 + auxiliary_key_idx, v); - } - - rc = sqlite3_step(stmt); - if(rc != SQLITE_DONE) { - sqlite3_finalize(stmt); - rc = SQLITE_ERROR; - goto cleanup; - } - sqlite3_finalize(stmt); - } - // read all the inserted vectors into vectorDatas, validate their lengths. for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) { @@ -6865,6 +8253,76 @@ int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv, goto cleanup; } + if(p->numAuxiliaryColumns > 0) { + sqlite3_stmt *stmt; + sqlite3_str * s = sqlite3_str_new(NULL); + sqlite3_str_appendf(s, "INSERT INTO " VEC0_SHADOW_AUXILIARY_NAME "(rowid ", p->schemaName, p->tableName); + for(int i = 0; i < p->numAuxiliaryColumns; i++) { + sqlite3_str_appendf(s, ", value%02d", i); + } + sqlite3_str_appendall(s, ") VALUES (? "); + for(int i = 0; i < p->numAuxiliaryColumns; i++) { + sqlite3_str_appendall(s, ", ?"); + } + sqlite3_str_appendall(s, ")"); + char * zSql = sqlite3_str_finish(s); + // TODO double check error handling ehre + if(!zSql) { + rc = SQLITE_NOMEM; + goto cleanup; + } + rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL); + if(rc != SQLITE_OK) { + goto cleanup; + } + sqlite3_bind_int64(stmt, 1, rowid); + + for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { + if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY) { + continue; + } + int auxiliary_key_idx = p->user_column_idxs[i]; + sqlite3_value * v = argv[2+VEC0_COLUMN_USERN_START + i]; + int v_type = sqlite3_value_type(v); + if(v_type != SQLITE_NULL && (v_type != p->auxiliary_columns[auxiliary_key_idx].type)) { + sqlite3_finalize(stmt); + rc = SQLITE_CONSTRAINT; + vtab_set_error( + pVTab, + "Auxiliary column type mismatch: The auxiliary column %.*s has type %s, but %s was provided.", + p->auxiliary_columns[auxiliary_key_idx].name_length, + p->auxiliary_columns[auxiliary_key_idx].name, + type_name(p->auxiliary_columns[auxiliary_key_idx].type), + type_name(v_type) + ); + goto cleanup; + } + // first 1 is for 1-based indexing on sqlite3_bind_*, second 1 is to account for initial rowid parameter + sqlite3_bind_value(stmt, 1 + 1 + auxiliary_key_idx, v); + } + + rc = sqlite3_step(stmt); + if(rc != SQLITE_DONE) { + sqlite3_finalize(stmt); + rc = SQLITE_ERROR; + goto cleanup; + } + sqlite3_finalize(stmt); + } + + + for(int i = 0; i < vec0_num_defined_user_columns(p); i++) { + if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) { + continue; + } + int metadata_idx = p->user_column_idxs[i]; + sqlite3_value *v = argv[2 + VEC0_COLUMN_USERN_START + i]; + rc = vec0_write_metadata_value(p, metadata_idx, rowid, chunk_rowid, chunk_offset, v, 0); + if(rc != SQLITE_OK) { + goto cleanup; + } + } + *pRowid = rowid; rc = SQLITE_OK; @@ -7002,6 +8460,83 @@ cleanup: return rc; } +int vec0Update_Delete_ClearMetadata(vec0_vtab *p, int metadata_idx, i64 rowid, i64 chunk_id, + u64 chunk_offset) { + int rc; + sqlite3_blob * blobValue; + vec0_metadata_column_kind kind = p->metadata_columns[metadata_idx].kind; + rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 1, &blobValue); + if(rc != SQLITE_OK) { + return rc; + } + + switch(kind) { + case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { + u8 block; + rc = sqlite3_blob_read(blobValue, &block, sizeof(u8), (int) (chunk_offset / CHAR_BIT)); + if(rc != SQLITE_OK) { + goto done; + } + + block &= ~(1 << (chunk_offset % CHAR_BIT)); + rc = sqlite3_blob_write(blobValue, &block, sizeof(u8), chunk_offset / CHAR_BIT); + break; + } + case VEC0_METADATA_COLUMN_KIND_INTEGER: { + i64 v = 0; + rc = sqlite3_blob_write(blobValue, &v, sizeof(v), chunk_offset * sizeof(i64)); + break; + } + case VEC0_METADATA_COLUMN_KIND_FLOAT: { + double v = 0; + rc = sqlite3_blob_write(blobValue, &v, sizeof(v), chunk_offset * sizeof(double)); + break; + } + case VEC0_METADATA_COLUMN_KIND_TEXT: { + int n; + rc = sqlite3_blob_read(blobValue, &n, sizeof(int), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH); + if(rc != SQLITE_OK) { + goto done; + } + + u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; + memset(view, 0, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH); + rc = sqlite3_blob_write(blobValue, &view, sizeof(view), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH); + if(rc != SQLITE_OK) { + goto done; + } + + if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) { + const char * zSql = sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx); + if(!zSql) { + rc = SQLITE_NOMEM; + goto done; + } + sqlite3_stmt * stmt; + rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL); + if(rc != SQLITE_OK) { + goto done; + } + sqlite3_bind_int64(stmt, 1, rowid); + rc = sqlite3_step(stmt); + if(rc != SQLITE_DONE) { + rc = SQLITE_ERROR; + goto done; + } + sqlite3_finalize(stmt); + } + break; + } + } + int rc2; + done: + rc2 = sqlite3_blob_close(blobValue); + if(rc == SQLITE_OK) { + return rc2; + } + return rc; +} + int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value *idValue) { vec0_vtab *p = (vec0_vtab *)pVTab; int rc; @@ -7055,6 +8590,11 @@ int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value *idValue) { } } + // 6. delete metadata + for(int i = 0; i < p->numMetadataColumns; i++) { + rc = vec0Update_Delete_ClearMetadata(p, i, rowid, chunk_id, chunk_offset); + } + return SQLITE_OK; } @@ -7195,7 +8735,6 @@ int vec0Update_Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv) { if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_PARTITION) { continue; } - int partition_key_idx = p->user_column_idxs[i]; sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START + i]; if(sqlite3_value_nochange(value)) { continue; @@ -7220,7 +8759,23 @@ int vec0Update_Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv) { } } - // 4) iterate over all new vectors, update the vectors + // 4) handle metadata column updates + for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { + if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) { + continue; + } + int metadata_column_idx = p->user_column_idxs[i]; + sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START + i]; + if(sqlite3_value_nochange(value)) { + continue; + } + rc = vec0_write_metadata_value(p, metadata_column_idx, rowid, chunk_id, chunk_offset, value, 1); + if(rc != SQLITE_OK) { + return rc; + } + } + + // 5) iterate over all new vectors, update the vectors for (int i = 0; i < vec0_num_defined_user_columns(p); i++) { if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) { continue; @@ -7271,12 +8826,52 @@ static int vec0Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv, } static int vec0ShadowName(const char *zName) { - static const char *azName[] = {"rowids", "chunks", "auxiliary", "vector_chunks"}; + static const char *azName[] = { + "rowids", "chunks", "auxiliary", "info", + + // Up to VEC0_MAX_METADATA_COLUMNS + // TODO be smarter about this man + "metadatachunks00", + "metadatachunks01", + "metadatachunks02", + "metadatachunks03", + "metadatachunks04", + "metadatachunks05", + "metadatachunks06", + "metadatachunks07", + "metadatachunks08", + "metadatachunks09", + "metadatachunks10", + "metadatachunks11", + "metadatachunks12", + "metadatachunks13", + "metadatachunks14", + "metadatachunks15", + + // Up to + "metadatatext00", + "metadatatext01", + "metadatatext02", + "metadatatext03", + "metadatatext04", + "metadatatext05", + "metadatatext06", + "metadatatext07", + "metadatatext08", + "metadatatext09", + "metadatatext10", + "metadatatext11", + "metadatatext12", + "metadatatext13", + "metadatatext14", + "metadatatext15", + }; for (size_t i = 0; i < sizeof(azName) / sizeof(azName[0]); i++) { if (sqlite3_stricmp(zName, azName[i]) == 0) return 1; } + //for(size_t i = 0; i < )"vector_chunks", "metadatachunks" return 0; } diff --git a/sqlite-vec.h.tmpl b/sqlite-vec.h.tmpl index 770e20f..b4f4411 100644 --- a/sqlite-vec.h.tmpl +++ b/sqlite-vec.h.tmpl @@ -18,9 +18,16 @@ #endif #define SQLITE_VEC_VERSION "v${VERSION}" +// TODO rm +#define SQLITE_VEC_VERSION "v-metadata-experiment.01" #define SQLITE_VEC_DATE "${DATE}" #define SQLITE_VEC_SOURCE "${SOURCE}" + +#define SQLITE_VEC_VERSION_MAJOR ${VERSION_MAJOR} +#define SQLITE_VEC_VERSION_MINOR ${VERSION_MINOR} +#define SQLITE_VEC_VERSION_PATCH ${VERSION_PATCH} + #ifdef __cplusplus extern "C" { #endif diff --git a/test.sql b/test.sql index e9a64a8..9d615a7 100644 --- a/test.sql +++ b/test.sql @@ -1,10 +1,333 @@ -.load dist/vec0 -.echo on +.load dist/vec0main .bail on .mode qbox + +.load ./memstat +.echo on + +select name, value from sqlite_memstat where name = 'MEMORY_USED'; + +create virtual table v using vec0( + vector float[1], + name1 text, + name2 text, + age int, + chunk_size=8 +); + +select name, value from sqlite_memstat where name = 'MEMORY_USED'; + +insert into v(vector, name1, name2, age) values + ('[1]', 'alex', 'xxxx', 1), + ('[2]', 'alex', 'aaaa', 2), + ('[3]', 'alex', 'aaaa', 3), + ('[4]', 'brian', 'aaaa', 1), + ('[5]', 'brian', 'aaaa', 2), + ('[6]', 'brian', 'aaaa', 3), + ('[7]', 'craig', 'aaaa', 1), + ('[8]', 'craig', 'xxxx', 2), + ('[9]', 'craig', 'xxxx', 3), + ('[10]', '123456789012345', 'xxxx', 3); + +select name, value from sqlite_memstat where name = 'MEMORY_USED'; + +select rowid, name1, name2, age, vec_to_json(vector) +from v +where vector match '[0]' + and k = 5 + and name1 in ('alex', 'brian', 'craig') + --and name2 in ('aaaa', 'xxxx') + and age in (1, 2, 3, 2222,3333,4444); + +select name, value from sqlite_memstat where name = 'MEMORY_USED'; + +select rowid, name1, name2, age, vec_to_json(vector) +from v +where vector match '[0]' + and k = 5 + and name1 in ('123456789012345', 'superfluous'); + + +.exit + +create virtual table v using vec0( + vector float[1], + +description text +); +insert into v(rowid, vector, description) values (1, '[1]', 'aaa'); +select * from v; + +.exit + +create virtual table vec_articles using vec0( + article_id integer primary key, + year integer partition key, + headline_embedding float[1], + +headline text, + +url text, + word_count integer, + print_section text, + print_page integer, + pub_date text, +); + +insert into vec_articles values (1111, 2020, '[1]', 'headline', 'https://...', 200, 'A', 1, '2020-01-01'); + +select * from vec_articles; + +.exit + + +create table movies(movie_id integer primary key, synopsis text); +INSERT INTO movies(movie_id, synopsis) +VALUES + (1, 'A family is haunted by demonic spirits after moving into a new house, requiring the help of paranormal investigators.'), + (2, 'Two dim-witted friends embark on a cross-country road trip to return a briefcase full of money to its owner.'), + (3, 'A team of explorers travels through a wormhole in space in an attempt to ensure humanity’s survival.'), + (4, 'A young hobbit embarks on a journey with a fellowship to destroy a powerful ring and save Middle-earth from darkness.'), + (5, 'A documentary about the dangers of global warming, featuring former U.S. Vice President Al Gore.'), + (6, 'After the death of her secretive mother, a woman discovers terrifying secrets about her family lineage.'), + (7, 'A clueless but charismatic TV anchorman struggles to stay relevant in the world of broadcast journalism.'), + (8, 'A young blade runner uncovers a long-buried secret that leads him to track down former blade runner Rick Deckard.'), + (9, 'A young boy discovers he is a wizard and attends a magical school, where he learns about his destiny.'), + (10, 'A rock climber attempts to scale El Capitan in Yosemite National Park without the use of ropes or safety gear.'), + (11, 'A young African-American man uncovers a disturbing secret when he visits his white girlfriend''s family estate.'), + (12, 'Three friends wake up from a bachelor party in Las Vegas with no memory of the previous night and must retrace their steps.'), + (13, 'A computer hacker learns about the true nature of his reality and his role in the war against its controllers.'), + (14, 'In post-Civil War Spain, a young girl escapes into an eerie but captivating fantasy world.'), + (15, 'A documentary that explores racial inequality in the United States, focusing on the prison system and mass incarceration.'), + (16, 'A young woman is followed by an unknown supernatural force after a sexual encounter.'), + (17, 'Two immature but well-meaning stepbrothers become instant rivals when their single parents marry.'), + (18, 'A thief with the ability to enter people''s dreams is tasked with planting an idea into a target''s subconscious.'), + (19, 'A mute woman forms a unique relationship with a mysterious aquatic creature being held in a secret research facility.'), + (20, 'A documentary about the life and legacy of Fred Rogers, the beloved host of the children''s TV show "Mister Rogers'' Neighborhood."'); + + +create virtual table vec_movies using vec0( + movie_id integer primary key, + synopsis_embedding float[1], + +title text, + genre text, + num_reviews int, + mean_rating float, + chunk_size=8 +); + +.schema +/* +insert into vec_movies(movie_id, synopsis_embedding, num_reviews, mean_rating) values + (1, '[1]', 153, 4.6), + (2, '[2]', 382, 2.6), + (3, '[3]', 53, 5.0), + (4, '[4]', 210, 4.2), + (5, '[5]', 93, 3.4), + (6, '[6]', 167, 4.7), + (7, '[7]', 482, 2.9), + (8, '[8]', 301, 5.0), + (9, '[9]', 134, 4.1), + (10, '[10]', 66, 3.2), + (11, '[11]', 88, 4.9), + (12, '[12]', 59, 2.8), + (13, '[13]', 423, 4.5), + (14, '[14]', 275, 3.6), + (15, '[15]', 191, 4.4), + (16, '[16]', 314, 4.3), + (17, '[17]', 74, 3.0), + (18, '[18]', 201, 5.0), + (19, '[19]', 399, 2.7), + (20, '[20]', 186, 4.8); +*/ + +/* + +INSERT INTO vec_movies(movie_id, synopsis_embedding, genre, num_reviews, mean_rating) +VALUES + (1, '[1]', 'horror', 153, 4.6), + (2, '[2]', 'comedy', 382, 2.6), + (3, '[3]', 'scifi', 53, 5.0), + (4, '[4]', 'fantasy', 210, 4.2), + (5, '[5]', 'documentary', 93, 3.4), + (6, '[6]', 'horror', 167, 4.7), + (7, '[7]', 'comedy', 482, 2.9), + (8, '[8]', 'scifi', 301, 5.0), + (9, '[9]', 'fantasy', 134, 4.1), + (10, '[10]', 'documentary', 66, 3.2), + (11, '[11]', 'horror', 88, 4.9), + (12, '[12]', 'comedy', 59, 2.8), + (13, '[13]', 'scifi', 423, 4.5), + (14, '[14]', 'fantasy', 275, 3.6), + (15, '[15]', 'documentary', 191, 4.4), + (16, '[16]', 'horror', 314, 4.3), + (17, '[17]', 'comedy', 74, 3.0), + (18, '[18]', 'scifi', 201, 5.0), + (19, '[19]', 'fantasy', 399, 2.7), + (20, '[20]', 'documentary', 186, 4.8); +*/ + +INSERT INTO vec_movies(movie_id, synopsis_embedding, genre, title, num_reviews, mean_rating) +VALUES + (1, '[1]', 'horror', 'The Conjuring', 153, 4.6), + (2, '[2]', 'comedy', 'Dumb and Dumber', 382, 2.6), + (3, '[3]', 'scifi', 'Interstellar', 53, 5.0), + (4, '[4]', 'fantasy', 'The Lord of the Rings: The Fellowship of the Ring', 210, 4.2), + (5, '[5]', 'documentary', 'An Inconvenient Truth', 93, 3.4), + (6, '[6]', 'horror', 'Hereditary', 167, 4.7), + (7, '[7]', 'comedy', 'Anchorman: The Legend of Ron Burgundy', 482, 2.9), + (8, '[8]', 'scifi', 'Blade Runner 2049', 301, 5.0), + (9, '[9]', 'fantasy', 'Harry Potter and the Sorcerer''s Stone', 134, 4.1), + (10, '[10]', 'documentary', 'Free Solo', 66, 3.2), + (11, '[11]', 'horror', 'Get Out', 88, 4.9), + (12, '[12]', 'comedy', 'The Hangover', 59, 2.8), + (13, '[13]', 'scifi', 'The Matrix', 423, 4.5), + (14, '[14]', 'fantasy', 'Pan''s Labyrinth', 275, 3.6), + (15, '[15]', 'documentary', '13th', 191, 4.4), + (16, '[16]', 'horror', 'It Follows', 314, 4.3), + (17, '[17]', 'comedy', 'Step Brothers', 74, 3.0), + (18, '[18]', 'scifi', 'Inception', 201, 5.0), + (19, '[19]', 'fantasy', 'The Shape of Water', 399, 2.7), + (20, '[20]', 'documentary', 'Won''t You Be My Neighbor?', 186, 4.8), + (21, '[21]', 'scifi', 'Gravity', 342, 4.0), + (22, '[22]', 'scifi', 'Dune', 451, 4.4), + (23, '[23]', 'scifi', 'The Martian', 522, 4.6), + (24, '[24]', 'horror', 'A Quiet Place', 271, 4.3), + (25, '[25]', 'fantasy', 'The Chronicles of Narnia: The Lion, the Witch and the Wardrobe', 310, 3.9); + +--select * from vec_movies; +--select * from vec_movies_metadata_chunks00; + + +create virtual table vec_chunks using vec0( + user_id integer partition key, + +contents text, + contents_embedding float[1], +); + +INSERT INTO vec_chunks (rowid, user_id, contents, contents_embedding) VALUES +(1, 123, 'Our PTO policy allows employees to take both vacation and sick leave as needed.', '[1]'), +(2, 123, 'Employees must provide notice at least two weeks in advance for planned vacations.', '[2]'), +(3, 123, 'Sick leave can be taken without advance notice, but employees must inform their manager.', '[3]'), +(4, 123, 'Unused PTO can be carried over to the following year, up to a maximum of 40 hours.', '[4]'), +(5, 123, 'PTO must be used in increments of at least 4 hours.', '[5]'), +(6, 456, 'New employees are granted 10 days of PTO during their first year of employment.', '[6]'), +(7, 456, 'After the first year, employees earn an additional day of PTO for each year of service.', '[7]'), +(8, 789, 'PTO requests will be reviewed by the HR department and are subject to approval.', '[8]'), +(9, 789, 'The company reserves the right to deny PTO requests during peak operational periods.', '[9]'), +(10, 456, 'If PTO is denied, the employee will be given an alternative time to take leave.', '[10]'), +(11, 789, 'Employees who are out of PTO must request unpaid leave for any additional time off.', '[11]'), +(12, 789, 'In case of a family emergency, employees can request emergency leave.', '[12]'), +(13, 456, 'Emergency leave may be granted for personal or family illness, or other critical situations.', '[13]'), +(14, 789, 'The maximum length of emergency leave is subject to company discretion.', '[14]'), +(15, 123, 'All PTO balances will be displayed on the employee self-service portal.', '[15]'), +(16, 456, 'Employees who are terminated will be paid for unused PTO, as per state law.', '[16]'), +(17, 123, 'Part-time employees are eligible for PTO on a pro-rata basis.', '[17]'), +(18, 789, 'The company encourages employees to use their PTO to maintain work-life balance.', '[18]'), +(19, 456, 'Employees should not book travel plans until their PTO request has been approved.', '[19]'), +(20, 123, 'Managers are responsible for tracking their team members'' PTO usage.', '[20]'); + +select rowid, user_id, contents, distance +from vec_chunks +where contents_embedding match '[19]' + and user_id = 123 + and k = 5; + +.exit + + + + + +-- PARTITION KEY and auxiliar columns! +create virtual table vec_chunks using vec0( + -- internally shard the vector index by user + user_id integer partition key, + -- store the chunk text pre-embedding as an "auxiliary column" + +contents text, + contents_embeddings float[1024], +); + +select rowid, user_id, contents, distance +from vec_chunks +where contents_embedding match '[...]' + and user_id = 123 + and k = 5; +/* +┌───────┬─────────┬──────────────────────────────────────────────────────────────┬──────────┐ +│ rowid │ user_id │ contents │ distance │ +├───────┼─────────┼──────────────────────────────────────────────────────────────┼──────────┤ +│ 20 │ 123 │ 'Managers are responsible for tracking their team members'' │ 1.0 │ +│ │ │ PTO usage.' │ │ +├───────┼─────────┼──────────────────────────────────────────────────────────────┼──────────┤ +│ 17 │ 123 │ 'Part-time employees are eligible for PTO on a pro-rata basi │ 2.0 │ +│ │ │ s.' │ │ +├───────┼─────────┼──────────────────────────────────────────────────────────────┼──────────┤ +│ 15 │ 123 │ 'All PTO balances will be displayed on the employee self-ser │ 4.0 │ +│ │ │ vice portal.' │ │ +├───────┼─────────┼──────────────────────────────────────────────────────────────┼──────────┤ +│ 5 │ 123 │ 'PTO must be used in increments of at least 4 hours.' │ 14.0 │ +├───────┼─────────┼──────────────────────────────────────────────────────────────┼──────────┤ +│ 4 │ 123 │ 'Unused PTO can be carried over to the following year, up to │ 15.0 │ +│ │ │ a maximum of 40 hours.' │ │ +└───────┴─────────┴──────────────────────────────────────────────────────────────┴──────────┘ +*/ + + + + + +-- metadata filters! +create virtual table vec_movies using vec0( + movie_id integer primary key, + synopsis_embedding float[1024], + genre text, + num_reviews int, + mean_rating float +); + +select + movie_id, + title, + genre, + num_reviews, + mean_rating, + distance +from vec_movies +where synopsis_embedding match '[15.5]' + and genre = 'scifi' + and num_reviews between 100 and 500 + and mean_rating > 3.5 + and k = 5; +/* +┌──────────┬─────────────────────┬─────────┬─────────────┬──────────────────┬──────────┐ +│ movie_id │ title │ genre │ num_reviews │ mean_rating │ distance │ +├──────────┼─────────────────────┼─────────┼─────────────┼──────────────────┼──────────┤ +│ 13 │ 'The Matrix' │ 'scifi' │ 423 │ 4.5 │ 2.5 │ +│ 18 │ 'Inception' │ 'scifi' │ 201 │ 5.0 │ 2.5 │ +│ 21 │ 'Gravity' │ 'scifi' │ 342 │ 4.0 │ 5.5 │ +│ 22 │ 'Dune' │ 'scifi' │ 451 │ 4.40000009536743 │ 6.5 │ +│ 8 │ 'Blade Runner 2049' │ 'scifi' │ 301 │ 5.0 │ 7.5 │ +└──────────┴─────────────────────┴─────────┴─────────────┴──────────────────┴──────────┘ +*/ + + + + +.exit + +create virtual table vec_movies using vec0( + movie_id integer primary key, + synopsis_embedding float[768], + genre text, + num_reviews int, + mean_rating float, +); + + +.exit + + create virtual table vec_chunks using vec0( chunk_id integer primary key, contents_embedding float[1], diff --git a/tests/__snapshots__/test-auxiliary.ambr b/tests/__snapshots__/test-auxiliary.ambr index eb84f0f..bfe3d2c 100644 --- a/tests/__snapshots__/test-auxiliary.ambr +++ b/tests/__snapshots__/test-auxiliary.ambr @@ -316,7 +316,7 @@ 'type': 'table', 'name': 'sqlite_sequence', 'tbl_name': 'sqlite_sequence', - 'rootpage': 3, + 'rootpage': 5, 'sql': 'CREATE TABLE sqlite_sequence(name,seq)', }), ]), @@ -326,18 +326,25 @@ OrderedDict({ 'sql': 'select * from sqlite_master order by name', 'rows': list([ + OrderedDict({ + 'type': 'index', + 'name': 'sqlite_autoindex_v_info_1', + 'tbl_name': 'v_info', + 'rootpage': 3, + 'sql': None, + }), OrderedDict({ 'type': 'index', 'name': 'sqlite_autoindex_v_vector_chunks00_1', 'tbl_name': 'v_vector_chunks00', - 'rootpage': 6, + 'rootpage': 8, 'sql': None, }), OrderedDict({ 'type': 'table', 'name': 'sqlite_sequence', 'tbl_name': 'sqlite_sequence', - 'rootpage': 3, + 'rootpage': 5, 'sql': 'CREATE TABLE sqlite_sequence(name,seq)', }), OrderedDict({ @@ -351,28 +358,35 @@ 'type': 'table', 'name': 'v_auxiliary', 'tbl_name': 'v_auxiliary', - 'rootpage': 7, + 'rootpage': 9, 'sql': 'CREATE TABLE "v_auxiliary"( rowid integer PRIMARY KEY , value00)', }), OrderedDict({ 'type': 'table', 'name': 'v_chunks', 'tbl_name': 'v_chunks', - 'rootpage': 2, + 'rootpage': 4, 'sql': 'CREATE TABLE "v_chunks"(chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,size INTEGER NOT NULL,validity BLOB NOT NULL,rowids BLOB NOT NULL)', }), + OrderedDict({ + 'type': 'table', + 'name': 'v_info', + 'tbl_name': 'v_info', + 'rootpage': 2, + 'sql': 'CREATE TABLE "v_info" (key text primary key, value any)', + }), OrderedDict({ 'type': 'table', 'name': 'v_rowids', 'tbl_name': 'v_rowids', - 'rootpage': 4, + 'rootpage': 6, 'sql': 'CREATE TABLE "v_rowids"(rowid INTEGER PRIMARY KEY AUTOINCREMENT,id,chunk_id INTEGER,chunk_offset INTEGER)', }), OrderedDict({ 'type': 'table', 'name': 'v_vector_chunks00', 'tbl_name': 'v_vector_chunks00', - 'rootpage': 5, + 'rootpage': 7, 'sql': 'CREATE TABLE "v_vector_chunks00"(rowid PRIMARY KEY,vectors BLOB NOT NULL)', }), ]), @@ -409,25 +423,25 @@ # --- # name: test_types.3 dict({ - 'error': 'OperationalError', + 'error': 'IntegrityError', 'message': 'Auxiliary column type mismatch: The auxiliary column aux_int has type INTEGER, but TEXT was provided.', }) # --- # name: test_types.4 dict({ - 'error': 'OperationalError', + 'error': 'IntegrityError', 'message': 'Auxiliary column type mismatch: The auxiliary column aux_float has type FLOAT, but TEXT was provided.', }) # --- # name: test_types.5 dict({ - 'error': 'OperationalError', + 'error': 'IntegrityError', 'message': 'Auxiliary column type mismatch: The auxiliary column aux_text has type TEXT, but INTEGER was provided.', }) # --- # name: test_types.6 dict({ - 'error': 'OperationalError', + 'error': 'IntegrityError', 'message': 'Auxiliary column type mismatch: The auxiliary column aux_blob has type BLOB, but INTEGER was provided.', }) # --- diff --git a/tests/__snapshots__/test-general.ambr b/tests/__snapshots__/test-general.ambr new file mode 100644 index 0000000..0eac460 --- /dev/null +++ b/tests/__snapshots__/test-general.ambr @@ -0,0 +1,184 @@ +# serializer version: 1 +# name: test_info + OrderedDict({ + 'sql': 'select key, typeof(value) from v_info order by 1', + 'rows': list([ + OrderedDict({ + 'key': 'CREATE_VERSION', + 'typeof(value)': 'text', + }), + OrderedDict({ + 'key': 'CREATE_VERSION_MAJOR', + 'typeof(value)': 'integer', + }), + OrderedDict({ + 'key': 'CREATE_VERSION_MINOR', + 'typeof(value)': 'integer', + }), + OrderedDict({ + 'key': 'CREATE_VERSION_PATCH', + 'typeof(value)': 'integer', + }), + ]), + }) +# --- +# name: test_shadow + OrderedDict({ + 'sql': 'select * from sqlite_master order by name', + 'rows': list([ + OrderedDict({ + 'type': 'index', + 'name': 'sqlite_autoindex_v_info_1', + 'tbl_name': 'v_info', + 'rootpage': 3, + 'sql': None, + }), + OrderedDict({ + 'type': 'index', + 'name': 'sqlite_autoindex_v_metadatachunks00_1', + 'tbl_name': 'v_metadatachunks00', + 'rootpage': 10, + 'sql': None, + }), + OrderedDict({ + 'type': 'index', + 'name': 'sqlite_autoindex_v_metadatatext00_1', + 'tbl_name': 'v_metadatatext00', + 'rootpage': 12, + 'sql': None, + }), + OrderedDict({ + 'type': 'index', + 'name': 'sqlite_autoindex_v_vector_chunks00_1', + 'tbl_name': 'v_vector_chunks00', + 'rootpage': 8, + 'sql': None, + }), + OrderedDict({ + 'type': 'table', + 'name': 'sqlite_sequence', + 'tbl_name': 'sqlite_sequence', + 'rootpage': 5, + 'sql': 'CREATE TABLE sqlite_sequence(name,seq)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v', + 'tbl_name': 'v', + 'rootpage': 0, + 'sql': 'CREATE VIRTUAL TABLE v using vec0(a float[1], partition text partition key, metadata text, +name text, chunk_size=8)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_auxiliary', + 'tbl_name': 'v_auxiliary', + 'rootpage': 13, + 'sql': 'CREATE TABLE "v_auxiliary"( rowid integer PRIMARY KEY , value00)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_chunks', + 'tbl_name': 'v_chunks', + 'rootpage': 4, + 'sql': 'CREATE TABLE "v_chunks"(chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,size INTEGER NOT NULL,sequence_id integer,partition00,validity BLOB NOT NULL, rowids BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_info', + 'tbl_name': 'v_info', + 'rootpage': 2, + 'sql': 'CREATE TABLE "v_info" (key text primary key, value any)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_metadatachunks00', + 'tbl_name': 'v_metadatachunks00', + 'rootpage': 9, + 'sql': 'CREATE TABLE "v_metadatachunks00"(rowid PRIMARY KEY, data BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_metadatatext00', + 'tbl_name': 'v_metadatatext00', + 'rootpage': 11, + 'sql': 'CREATE TABLE "v_metadatatext00"(rowid PRIMARY KEY, data TEXT)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_rowids', + 'tbl_name': 'v_rowids', + 'rootpage': 6, + 'sql': 'CREATE TABLE "v_rowids"(rowid INTEGER PRIMARY KEY AUTOINCREMENT,id,chunk_id INTEGER,chunk_offset INTEGER)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_vector_chunks00', + 'tbl_name': 'v_vector_chunks00', + 'rootpage': 7, + 'sql': 'CREATE TABLE "v_vector_chunks00"(rowid PRIMARY KEY,vectors BLOB NOT NULL)', + }), + ]), + }) +# --- +# name: test_shadow.1 + OrderedDict({ + 'sql': "select * from pragma_table_list where type = 'shadow'", + 'rows': list([ + OrderedDict({ + 'schema': 'main', + 'name': 'v_auxiliary', + 'type': 'shadow', + 'ncol': 2, + 'wr': 0, + 'strict': 0, + }), + OrderedDict({ + 'schema': 'main', + 'name': 'v_chunks', + 'type': 'shadow', + 'ncol': 6, + 'wr': 0, + 'strict': 0, + }), + OrderedDict({ + 'schema': 'main', + 'name': 'v_info', + 'type': 'shadow', + 'ncol': 2, + 'wr': 0, + 'strict': 0, + }), + OrderedDict({ + 'schema': 'main', + 'name': 'v_rowids', + 'type': 'shadow', + 'ncol': 4, + 'wr': 0, + 'strict': 0, + }), + OrderedDict({ + 'schema': 'main', + 'name': 'v_metadatachunks00', + 'type': 'shadow', + 'ncol': 2, + 'wr': 0, + 'strict': 0, + }), + OrderedDict({ + 'schema': 'main', + 'name': 'v_metadatatext00', + 'type': 'shadow', + 'ncol': 2, + 'wr': 0, + 'strict': 0, + }), + ]), + }) +# --- +# name: test_shadow.2 + OrderedDict({ + 'sql': "select * from pragma_table_list where type = 'shadow'", + 'rows': list([ + ]), + }) +# --- diff --git a/tests/__snapshots__/test-metadata.ambr b/tests/__snapshots__/test-metadata.ambr new file mode 100644 index 0000000..12212ff --- /dev/null +++ b/tests/__snapshots__/test-metadata.ambr @@ -0,0 +1,4097 @@ +# serializer version: 1 +# name: test_constructor_limit[max 16 metadata columns] + dict({ + 'error': 'OperationalError', + 'message': 'vec0 constructor error: More than 16 metadata columns were provided', + }) +# --- +# name: test_deletes + OrderedDict({ + 'sql': 'insert into v(rowid, vector, b, n, f, t) values (?, ?, ?, ?, ?, ?)', + 'rows': list([ + ]), + }) +# --- +# name: test_deletes.1 + OrderedDict({ + 'sql': 'insert into v(rowid, vector, b, n, f, t) values (?, ?, ?, ?, ?, ?)', + 'rows': list([ + ]), + }) +# --- +# name: test_deletes.10 + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + OrderedDict({ + 'chunk_id': 1, + 'size': 8, + 'validity': b'\x02', + 'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks00': OrderedDict({ + 'sql': 'select * from v_metadatachunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x02', + }), + ]), + }), + 'v_metadatachunks01': OrderedDict({ + 'sql': 'select * from v_metadatachunks01', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks02': OrderedDict({ + 'sql': 'select * from v_metadatachunks02', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x00\x00\x00\x00\x00\x00\x00\x00\x9a\x99\x99\x99\x99\x99\x01@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks03': OrderedDict({ + 'sql': 'select * from v_metadatachunks03', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00test2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatatext03': OrderedDict({ + 'sql': 'select * from v_metadatatext03', + 'rows': list([ + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + OrderedDict({ + 'rowid': 2, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 1, + }), + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + }) +# --- +# name: test_deletes.2 + OrderedDict({ + 'sql': 'insert into v(rowid, vector, b, n, f, t) values (?, ?, ?, ?, ?, ?)', + 'rows': list([ + ]), + }) +# --- +# name: test_deletes.3 + OrderedDict({ + 'sql': 'select * from v', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vector': b'\x11\x11\x11\x11', + 'b': 1, + 'n': 1, + 'f': 1.1, + 't': 'test1', + }), + OrderedDict({ + 'rowid': 2, + 'vector': b'""""', + 'b': 1, + 'n': 2, + 'f': 2.2, + 't': 'test2', + }), + OrderedDict({ + 'rowid': 3, + 'vector': b'3333', + 'b': 1, + 'n': 3, + 'f': 3.3, + 't': '1234567890123', + }), + ]), + }) +# --- +# name: test_deletes.4 + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + OrderedDict({ + 'chunk_id': 1, + 'size': 8, + 'validity': b'\x07', + 'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks00': OrderedDict({ + 'sql': 'select * from v_metadatachunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x07', + }), + ]), + }), + 'v_metadatachunks01': OrderedDict({ + 'sql': 'select * from v_metadatachunks01', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks02': OrderedDict({ + 'sql': 'select * from v_metadatachunks02', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks03': OrderedDict({ + 'sql': 'select * from v_metadatachunks03', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x05\x00\x00\x00test1\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00test2\x00\x00\x00\x00\x00\x00\x00\r\x00\x00\x00123456789012\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatatext03': OrderedDict({ + 'sql': 'select * from v_metadatatext03', + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 'data': '1234567890123', + }), + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 0, + }), + OrderedDict({ + 'rowid': 2, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 1, + }), + OrderedDict({ + 'rowid': 3, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 2, + }), + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + }) +# --- +# name: test_deletes.5 + OrderedDict({ + 'sql': 'DELETE FROM v where rowid = 1', + 'rows': list([ + ]), + }) +# --- +# name: test_deletes.6 + OrderedDict({ + 'sql': 'select * from v', + 'rows': list([ + OrderedDict({ + 'rowid': 2, + 'vector': b'""""', + 'b': 1, + 'n': 2, + 'f': 2.2, + 't': 'test2', + }), + OrderedDict({ + 'rowid': 3, + 'vector': b'3333', + 'b': 1, + 'n': 3, + 'f': 3.3, + 't': '1234567890123', + }), + ]), + }) +# --- +# name: test_deletes.7 + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + OrderedDict({ + 'chunk_id': 1, + 'size': 8, + 'validity': b'\x06', + 'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks00': OrderedDict({ + 'sql': 'select * from v_metadatachunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x06', + }), + ]), + }), + 'v_metadatachunks01': OrderedDict({ + 'sql': 'select * from v_metadatachunks01', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks02': OrderedDict({ + 'sql': 'select * from v_metadatachunks02', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x00\x00\x00\x00\x00\x00\x00\x00\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks03': OrderedDict({ + 'sql': 'select * from v_metadatachunks03', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00test2\x00\x00\x00\x00\x00\x00\x00\r\x00\x00\x00123456789012\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatatext03': OrderedDict({ + 'sql': 'select * from v_metadatatext03', + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 'data': '1234567890123', + }), + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + OrderedDict({ + 'rowid': 2, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 1, + }), + OrderedDict({ + 'rowid': 3, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 2, + }), + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + }) +# --- +# name: test_deletes.8 + OrderedDict({ + 'sql': 'DELETE FROM v where rowid = 3', + 'rows': list([ + ]), + }) +# --- +# name: test_deletes.9 + OrderedDict({ + 'sql': 'select * from v', + 'rows': list([ + OrderedDict({ + 'rowid': 2, + 'vector': b'""""', + 'b': 1, + 'n': 2, + 'f': 2.2, + 't': 'test2', + }), + ]), + }) +# --- +# name: test_errors + OrderedDict({ + 'sql': 'select * from v', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vector': b'\x00\x00\x80?', + 't': 'aaaaaaaaaaaax', + }), + ]), + }) +# --- +# name: test_errors.1 + dict({ + 'error': 'OperationalError', + 'message': 'Could not extract metadata value for column t at rowid 1', + }) +# --- +# name: test_idxstr + OrderedDict({ + 'sql': "select * from vec_movies where synopsis_embedding match '' and k = 0 and is_favorited = true", + 'plan': list([ + dict({ + 'detail': 'SCAN vec_movies VIRTUAL TABLE INDEX 0:3{___}___&Aa_', + 'id': 2, + 'parent': 0, + }), + ]), + }) +# --- +# name: test_idxstr[knn-constraint-float !=] + OrderedDict({ + 'sql': "select * from vec_movies where synopsis_embedding match '' and k = 0 and mean_rating != NULL", + 'plan': list([ + dict({ + 'detail': 'SCAN vec_movies VIRTUAL TABLE INDEX 0:3{___}___&Df_', + 'id': 2, + 'parent': 0, + }), + ]), + }) +# --- +# name: test_idxstr[knn-constraint-float <=] + OrderedDict({ + 'sql': "select * from vec_movies where synopsis_embedding match '' and k = 0 and mean_rating <= NULL", + 'plan': list([ + dict({ + 'detail': 'SCAN vec_movies VIRTUAL TABLE INDEX 0:3{___}___&Dc_', + 'id': 2, + 'parent': 0, + }), + ]), + }) +# --- +# name: test_idxstr[knn-constraint-float <] + OrderedDict({ + 'sql': "select * from vec_movies where synopsis_embedding match '' and k = 0 and mean_rating < NULL", + 'plan': list([ + dict({ + 'detail': 'SCAN vec_movies VIRTUAL TABLE INDEX 0:3{___}___&Dd_', + 'id': 2, + 'parent': 0, + }), + ]), + }) +# --- +# name: test_idxstr[knn-constraint-float >=] + OrderedDict({ + 'sql': "select * from vec_movies where synopsis_embedding match '' and k = 0 and mean_rating >= NULL", + 'plan': list([ + dict({ + 'detail': 'SCAN vec_movies VIRTUAL TABLE INDEX 0:3{___}___&De_', + 'id': 2, + 'parent': 0, + }), + ]), + }) +# --- +# name: test_idxstr[knn-constraint-float >] + OrderedDict({ + 'sql': "select * from vec_movies where synopsis_embedding match '' and k = 0 and mean_rating > NULL", + 'plan': list([ + dict({ + 'detail': 'SCAN vec_movies VIRTUAL TABLE INDEX 0:3{___}___&Db_', + 'id': 2, + 'parent': 0, + }), + ]), + }) +# --- +# name: test_idxstr[knn-constraint-int !=] + OrderedDict({ + 'sql': "select * from vec_movies where synopsis_embedding match '' and k = 0 and num_reviews != NULL", + 'plan': list([ + dict({ + 'detail': 'SCAN vec_movies VIRTUAL TABLE INDEX 0:3{___}___&Cf_', + 'id': 2, + 'parent': 0, + }), + ]), + }) +# --- +# name: test_idxstr[knn-constraint-int <=] + OrderedDict({ + 'sql': "select * from vec_movies where synopsis_embedding match '' and k = 0 and num_reviews <= NULL", + 'plan': list([ + dict({ + 'detail': 'SCAN vec_movies VIRTUAL TABLE INDEX 0:3{___}___&Cc_', + 'id': 2, + 'parent': 0, + }), + ]), + }) +# --- +# name: test_idxstr[knn-constraint-int <] + OrderedDict({ + 'sql': "select * from vec_movies where synopsis_embedding match '' and k = 0 and num_reviews < NULL", + 'plan': list([ + dict({ + 'detail': 'SCAN vec_movies VIRTUAL TABLE INDEX 0:3{___}___&Cd_', + 'id': 2, + 'parent': 0, + }), + ]), + }) +# --- +# name: test_idxstr[knn-constraint-int >=] + OrderedDict({ + 'sql': "select * from vec_movies where synopsis_embedding match '' and k = 0 and num_reviews >= NULL", + 'plan': list([ + dict({ + 'detail': 'SCAN vec_movies VIRTUAL TABLE INDEX 0:3{___}___&Ce_', + 'id': 2, + 'parent': 0, + }), + ]), + }) +# --- +# name: test_idxstr[knn-constraint-int >] + OrderedDict({ + 'sql': "select * from vec_movies where synopsis_embedding match '' and k = 0 and num_reviews > NULL", + 'plan': list([ + dict({ + 'detail': 'SCAN vec_movies VIRTUAL TABLE INDEX 0:3{___}___&Cb_', + 'id': 2, + 'parent': 0, + }), + ]), + }) +# --- +# name: test_idxstr[knn-constraint-text !=] + OrderedDict({ + 'sql': "select * from vec_movies where synopsis_embedding match '' and k = 0 and genre != NULL", + 'plan': list([ + dict({ + 'detail': 'SCAN vec_movies VIRTUAL TABLE INDEX 0:3{___}___&Bf_', + 'id': 2, + 'parent': 0, + }), + ]), + }) +# --- +# name: test_idxstr[knn-constraint-text <=] + OrderedDict({ + 'sql': "select * from vec_movies where synopsis_embedding match '' and k = 0 and genre <= NULL", + 'plan': list([ + dict({ + 'detail': 'SCAN vec_movies VIRTUAL TABLE INDEX 0:3{___}___&Bc_', + 'id': 2, + 'parent': 0, + }), + ]), + }) +# --- +# name: test_idxstr[knn-constraint-text <] + OrderedDict({ + 'sql': "select * from vec_movies where synopsis_embedding match '' and k = 0 and genre < NULL", + 'plan': list([ + dict({ + 'detail': 'SCAN vec_movies VIRTUAL TABLE INDEX 0:3{___}___&Bd_', + 'id': 2, + 'parent': 0, + }), + ]), + }) +# --- +# name: test_idxstr[knn-constraint-text >=] + OrderedDict({ + 'sql': "select * from vec_movies where synopsis_embedding match '' and k = 0 and genre >= NULL", + 'plan': list([ + dict({ + 'detail': 'SCAN vec_movies VIRTUAL TABLE INDEX 0:3{___}___&Be_', + 'id': 2, + 'parent': 0, + }), + ]), + }) +# --- +# name: test_idxstr[knn-constraint-text >] + OrderedDict({ + 'sql': "select * from vec_movies where synopsis_embedding match '' and k = 0 and genre > NULL", + 'plan': list([ + dict({ + 'detail': 'SCAN vec_movies VIRTUAL TABLE INDEX 0:3{___}___&Bb_', + 'id': 2, + 'parent': 0, + }), + ]), + }) +# --- +# name: test_knn.1 + dict({ + 'error': 'OperationalError', + 'message': 'An illegal WHERE constraint was provided on a vec0 metadata column in a KNN query. Only one of EQUALS, GREATER_THAN, LESS_THAN_OR_EQUAL, LESS_THAN, GREATER_THAN_OR_EQUAL, NOT_EQUALS is allowed.', + }) +# --- +# name: test_knn[sqlite_master] + OrderedDict({ + 'sql': "select * from sqlite_master where type = 'table' order by name", + 'rows': list([ + OrderedDict({ + 'type': 'table', + 'name': 'sqlite_sequence', + 'tbl_name': 'sqlite_sequence', + 'rootpage': 5, + 'sql': 'CREATE TABLE sqlite_sequence(name,seq)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v', + 'tbl_name': 'v', + 'rootpage': 0, + 'sql': 'CREATE VIRTUAL TABLE v using vec0(vector float[1], name text, chunk_size=8)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_chunks', + 'tbl_name': 'v_chunks', + 'rootpage': 4, + 'sql': 'CREATE TABLE "v_chunks"(chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,size INTEGER NOT NULL,validity BLOB NOT NULL,rowids BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_info', + 'tbl_name': 'v_info', + 'rootpage': 2, + 'sql': 'CREATE TABLE "v_info" (key text primary key, value any)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_metadatachunks00', + 'tbl_name': 'v_metadatachunks00', + 'rootpage': 9, + 'sql': 'CREATE TABLE "v_metadatachunks00"(rowid PRIMARY KEY, data BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_metadatatext00', + 'tbl_name': 'v_metadatatext00', + 'rootpage': 11, + 'sql': 'CREATE TABLE "v_metadatatext00"(rowid PRIMARY KEY, data TEXT)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_rowids', + 'tbl_name': 'v_rowids', + 'rootpage': 6, + 'sql': 'CREATE TABLE "v_rowids"(rowid INTEGER PRIMARY KEY AUTOINCREMENT,id,chunk_id INTEGER,chunk_offset INTEGER)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_vector_chunks00', + 'tbl_name': 'v_vector_chunks00', + 'rootpage': 7, + 'sql': 'CREATE TABLE "v_vector_chunks00"(rowid PRIMARY KEY,vectors BLOB NOT NULL)', + }), + ]), + }) +# --- +# name: test_long_text_knn[eq-bb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name = ?", + 'rows': list([ + ]), + }) +# --- +# name: test_long_text_knn[eq-bbbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name = ?", + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[eq-bbbbbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name = ?", + 'rows': list([ + ]), + }) +# --- +# name: test_long_text_knn[eq-bbbbbbbbbbbb_aaa] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name = ?", + 'rows': list([ + ]), + }) +# --- +# name: test_long_text_knn[eq-bbbbbbbbbbbb_bbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name = ?", + 'rows': list([ + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[eq-bbbbbbbbbbbb_ccc] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name = ?", + 'rows': list([ + ]), + }) +# --- +# name: test_long_text_knn[eq-longlonglonglonglonglonglong] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name = ?", + 'rows': list([ + ]), + }) +# --- +# name: test_long_text_knn[ge-bb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name >= ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[ge-bbbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name >= ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[ge-bbbbbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name >= ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[ge-bbbbbbbbbbbb_aaa] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name >= ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[ge-bbbbbbbbbbbb_bbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name >= ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[ge-bbbbbbbbbbbb_ccc] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name >= ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[ge-longlonglonglonglonglonglong] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name >= ?", + 'rows': list([ + ]), + }) +# --- +# name: test_long_text_knn[gt-bb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name > ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[gt-bbbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name > ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[gt-bbbbbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name > ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[gt-bbbbbbbbbbbb_aaa] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name > ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[gt-bbbbbbbbbbbb_bbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name > ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[gt-bbbbbbbbbbbb_ccc] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name > ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[gt-longlonglonglonglonglonglong] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name > ?", + 'rows': list([ + ]), + }) +# --- +# name: test_long_text_knn[le-bb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name <= ?", + 'rows': list([ + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + OrderedDict({ + 'rowid': 1, + 'name': 'aaaa', + 'distance': 99.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[le-bbbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name <= ?", + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + OrderedDict({ + 'rowid': 1, + 'name': 'aaaa', + 'distance': 99.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[le-bbbbbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name <= ?", + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + OrderedDict({ + 'rowid': 1, + 'name': 'aaaa', + 'distance': 99.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[le-bbbbbbbbbbbb_aaa] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name <= ?", + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + OrderedDict({ + 'rowid': 1, + 'name': 'aaaa', + 'distance': 99.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[le-bbbbbbbbbbbb_bbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name <= ?", + 'rows': list([ + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + OrderedDict({ + 'rowid': 1, + 'name': 'aaaa', + 'distance': 99.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[le-bbbbbbbbbbbb_ccc] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name <= ?", + 'rows': list([ + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + OrderedDict({ + 'rowid': 1, + 'name': 'aaaa', + 'distance': 99.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[le-longlonglonglonglonglonglong] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name <= ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[lt-bb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name < ?", + 'rows': list([ + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + OrderedDict({ + 'rowid': 1, + 'name': 'aaaa', + 'distance': 99.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[lt-bbbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name < ?", + 'rows': list([ + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + OrderedDict({ + 'rowid': 1, + 'name': 'aaaa', + 'distance': 99.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[lt-bbbbbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name < ?", + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + OrderedDict({ + 'rowid': 1, + 'name': 'aaaa', + 'distance': 99.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[lt-bbbbbbbbbbbb_aaa] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name < ?", + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + OrderedDict({ + 'rowid': 1, + 'name': 'aaaa', + 'distance': 99.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[lt-bbbbbbbbbbbb_bbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name < ?", + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + OrderedDict({ + 'rowid': 1, + 'name': 'aaaa', + 'distance': 99.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[lt-bbbbbbbbbbbb_ccc] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name < ?", + 'rows': list([ + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + OrderedDict({ + 'rowid': 1, + 'name': 'aaaa', + 'distance': 99.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[lt-longlonglonglonglonglonglong] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name < ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[ne-bb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name != ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[ne-bbbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name != ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + OrderedDict({ + 'rowid': 1, + 'name': 'aaaa', + 'distance': 99.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[ne-bbbbbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name != ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[ne-bbbbbbbbbbbb_aaa] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name != ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[ne-bbbbbbbbbbbb_bbb] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name != ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + OrderedDict({ + 'rowid': 1, + 'name': 'aaaa', + 'distance': 99.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[ne-bbbbbbbbbbbb_ccc] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name != ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + ]), + }) +# --- +# name: test_long_text_knn[ne-longlonglonglonglonglonglong] + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name != ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + OrderedDict({ + 'rowid': 3, + 'name': 'bbbb', + 'distance': 97.0, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'aaaaaaaaaaaa_aaa', + 'distance': 98.0, + }), + ]), + }) +# --- +# name: test_long_text_updates + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + ]), + }), + 'v_metadatachunks00': OrderedDict({ + 'sql': 'select * from v_metadatachunks00', + 'rows': list([ + ]), + }), + 'v_metadatatext00': OrderedDict({ + 'sql': 'select * from v_metadatatext00', + 'rows': list([ + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + ]), + }), + }) +# --- +# name: test_long_text_updates.1 + OrderedDict({ + 'sql': 'select * from v', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vector': b'\x11\x11\x11\x11', + 'name': '123456789a12', + }), + OrderedDict({ + 'rowid': 2, + 'vector': b'\x11\x11\x11\x11', + 'name': '123456789a123', + }), + ]), + }) +# --- +# name: test_long_text_updates.2 + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + OrderedDict({ + 'chunk_id': 1, + 'size': 8, + 'validity': b'\x03', + 'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks00': OrderedDict({ + 'sql': 'select * from v_metadatachunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x0c\x00\x00\x00123456789a12\r\x00\x00\x00123456789a12\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatatext00': OrderedDict({ + 'sql': 'select * from v_metadatatext00', + 'rows': list([ + OrderedDict({ + 'rowid': 2, + 'data': '123456789a123', + }), + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 0, + }), + OrderedDict({ + 'rowid': 2, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 1, + }), + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vectors': b'\x11\x11\x11\x11\x11\x11\x11\x11\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + }) +# --- +# name: test_normal.1 + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + ]), + }), + 'v_metadatachunks00': OrderedDict({ + 'sql': 'select * from v_metadatachunks00', + 'rows': list([ + ]), + }), + 'v_metadatachunks01': OrderedDict({ + 'sql': 'select * from v_metadatachunks01', + 'rows': list([ + ]), + }), + 'v_metadatachunks02': OrderedDict({ + 'sql': 'select * from v_metadatachunks02', + 'rows': list([ + ]), + }), + 'v_metadatachunks03': OrderedDict({ + 'sql': 'select * from v_metadatachunks03', + 'rows': list([ + ]), + }), + 'v_metadatatext03': OrderedDict({ + 'sql': 'select * from v_metadatatext03', + 'rows': list([ + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + ]), + }), + }) +# --- +# name: test_normal.2 + OrderedDict({ + 'sql': 'insert into v(vector, b, n, f, t) values (?, ?, ?, ?, ?)', + 'rows': list([ + ]), + }) +# --- +# name: test_normal.3 + OrderedDict({ + 'sql': 'insert into v(vector, b, n, f, t) values (?, ?, ?, ?, ?)', + 'rows': list([ + ]), + }) +# --- +# name: test_normal.4 + OrderedDict({ + 'sql': 'insert into v(vector, b, n, f, t) values (?, ?, ?, ?, ?)', + 'rows': list([ + ]), + }) +# --- +# name: test_normal.5 + OrderedDict({ + 'sql': 'select * from v', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vector': b'\x11\x11\x11\x11', + 'b': 1, + 'n': 1, + 'f': 1.1, + 't': 'one', + }), + OrderedDict({ + 'rowid': 2, + 'vector': b'""""', + 'b': 1, + 'n': 2, + 'f': 2.2, + 't': 'two', + }), + OrderedDict({ + 'rowid': 3, + 'vector': b'3333', + 'b': 1, + 'n': 3, + 'f': 3.3, + 't': 'three', + }), + ]), + }) +# --- +# name: test_normal.6 + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + OrderedDict({ + 'chunk_id': 1, + 'size': 8, + 'validity': b'\x07', + 'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks00': OrderedDict({ + 'sql': 'select * from v_metadatachunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x07', + }), + ]), + }), + 'v_metadatachunks01': OrderedDict({ + 'sql': 'select * from v_metadatachunks01', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks02': OrderedDict({ + 'sql': 'select * from v_metadatachunks02', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks03': OrderedDict({ + 'sql': 'select * from v_metadatachunks03', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x03\x00\x00\x00one\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00two\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00three\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatatext03': OrderedDict({ + 'sql': 'select * from v_metadatatext03', + 'rows': list([ + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 0, + }), + OrderedDict({ + 'rowid': 2, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 1, + }), + OrderedDict({ + 'rowid': 3, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 2, + }), + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + }) +# --- +# name: test_normal.7 + OrderedDict({ + 'sql': 'drop table v', + 'rows': list([ + ]), + }) +# --- +# name: test_normal.8 + OrderedDict({ + 'sql': 'select * from sqlite_master', + 'rows': list([ + OrderedDict({ + 'type': 'table', + 'name': 'sqlite_sequence', + 'tbl_name': 'sqlite_sequence', + 'rootpage': 5, + 'sql': 'CREATE TABLE sqlite_sequence(name,seq)', + }), + ]), + }) +# --- +# name: test_normal[sqlite_master] + OrderedDict({ + 'sql': "select * from sqlite_master where type = 'table' order by name", + 'rows': list([ + OrderedDict({ + 'type': 'table', + 'name': 'sqlite_sequence', + 'tbl_name': 'sqlite_sequence', + 'rootpage': 5, + 'sql': 'CREATE TABLE sqlite_sequence(name,seq)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v', + 'tbl_name': 'v', + 'rootpage': 0, + 'sql': 'CREATE VIRTUAL TABLE v using vec0(vector float[1], b boolean, n int, f float, t text, chunk_size=8)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_chunks', + 'tbl_name': 'v_chunks', + 'rootpage': 4, + 'sql': 'CREATE TABLE "v_chunks"(chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,size INTEGER NOT NULL,validity BLOB NOT NULL,rowids BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_info', + 'tbl_name': 'v_info', + 'rootpage': 2, + 'sql': 'CREATE TABLE "v_info" (key text primary key, value any)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_metadatachunks00', + 'tbl_name': 'v_metadatachunks00', + 'rootpage': 9, + 'sql': 'CREATE TABLE "v_metadatachunks00"(rowid PRIMARY KEY, data BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_metadatachunks01', + 'tbl_name': 'v_metadatachunks01', + 'rootpage': 11, + 'sql': 'CREATE TABLE "v_metadatachunks01"(rowid PRIMARY KEY, data BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_metadatachunks02', + 'tbl_name': 'v_metadatachunks02', + 'rootpage': 13, + 'sql': 'CREATE TABLE "v_metadatachunks02"(rowid PRIMARY KEY, data BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_metadatachunks03', + 'tbl_name': 'v_metadatachunks03', + 'rootpage': 15, + 'sql': 'CREATE TABLE "v_metadatachunks03"(rowid PRIMARY KEY, data BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_metadatatext03', + 'tbl_name': 'v_metadatatext03', + 'rootpage': 17, + 'sql': 'CREATE TABLE "v_metadatatext03"(rowid PRIMARY KEY, data TEXT)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_rowids', + 'tbl_name': 'v_rowids', + 'rootpage': 6, + 'sql': 'CREATE TABLE "v_rowids"(rowid INTEGER PRIMARY KEY AUTOINCREMENT,id,chunk_id INTEGER,chunk_offset INTEGER)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_vector_chunks00', + 'tbl_name': 'v_vector_chunks00', + 'rootpage': 7, + 'sql': 'CREATE TABLE "v_vector_chunks00"(rowid PRIMARY KEY,vectors BLOB NOT NULL)', + }), + ]), + }) +# --- +# name: test_stress + dict({ + 'vec_movies_auxiliary': OrderedDict({ + 'sql': 'select * from vec_movies_auxiliary', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'value00': 'The Conjuring', + }), + OrderedDict({ + 'rowid': 2, + 'value00': 'Dumb and Dumber', + }), + OrderedDict({ + 'rowid': 3, + 'value00': 'Interstellar', + }), + OrderedDict({ + 'rowid': 4, + 'value00': 'The Lord of the Rings: The Fellowship of the Ring', + }), + OrderedDict({ + 'rowid': 5, + 'value00': 'An Inconvenient Truth', + }), + OrderedDict({ + 'rowid': 6, + 'value00': 'Hereditary', + }), + OrderedDict({ + 'rowid': 7, + 'value00': 'Anchorman: The Legend of Ron Burgundy', + }), + OrderedDict({ + 'rowid': 8, + 'value00': 'Blade Runner 2049', + }), + OrderedDict({ + 'rowid': 9, + 'value00': "Harry Potter and the Sorcerer's Stone", + }), + OrderedDict({ + 'rowid': 10, + 'value00': 'Free Solo', + }), + OrderedDict({ + 'rowid': 11, + 'value00': 'Get Out', + }), + OrderedDict({ + 'rowid': 12, + 'value00': 'The Hangover', + }), + OrderedDict({ + 'rowid': 13, + 'value00': 'The Matrix', + }), + OrderedDict({ + 'rowid': 14, + 'value00': "Pan's Labyrinth", + }), + OrderedDict({ + 'rowid': 15, + 'value00': '13th', + }), + OrderedDict({ + 'rowid': 16, + 'value00': 'It Follows', + }), + OrderedDict({ + 'rowid': 17, + 'value00': 'Step Brothers', + }), + OrderedDict({ + 'rowid': 18, + 'value00': 'Inception', + }), + OrderedDict({ + 'rowid': 19, + 'value00': 'The Shape of Water', + }), + OrderedDict({ + 'rowid': 20, + 'value00': "Won't You Be My Neighbor?", + }), + OrderedDict({ + 'rowid': 21, + 'value00': 'Gravity', + }), + OrderedDict({ + 'rowid': 22, + 'value00': 'Dune', + }), + OrderedDict({ + 'rowid': 23, + 'value00': 'The Martian', + }), + OrderedDict({ + 'rowid': 24, + 'value00': 'A Quiet Place', + }), + OrderedDict({ + 'rowid': 25, + 'value00': 'The Chronicles of Narnia: The Lion, the Witch and the Wardrobe', + }), + ]), + }), + 'vec_movies_chunks': OrderedDict({ + 'sql': 'select * from vec_movies_chunks', + 'rows': list([ + OrderedDict({ + 'chunk_id': 1, + 'size': 8, + 'validity': b'\xff', + 'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00', + }), + OrderedDict({ + 'chunk_id': 2, + 'size': 8, + 'validity': b'\xff', + 'rowids': b'\t\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\r\x00\x00\x00\x00\x00\x00\x00\x0e\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00', + }), + OrderedDict({ + 'chunk_id': 3, + 'size': 8, + 'validity': b'\xff', + 'rowids': b'\x11\x00\x00\x00\x00\x00\x00\x00\x12\x00\x00\x00\x00\x00\x00\x00\x13\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x15\x00\x00\x00\x00\x00\x00\x00\x16\x00\x00\x00\x00\x00\x00\x00\x17\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00', + }), + OrderedDict({ + 'chunk_id': 4, + 'size': 8, + 'validity': b'\x01', + 'rowids': b'\x19\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'vec_movies_metadatachunks00': OrderedDict({ + 'sql': 'select * from vec_movies_metadatachunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'p', + }), + OrderedDict({ + 'rowid': 2, + 'data': b'U', + }), + OrderedDict({ + 'rowid': 3, + 'data': b'\xff', + }), + OrderedDict({ + 'rowid': 4, + 'data': b'\x01', + }), + ]), + }), + 'vec_movies_metadatachunks01': OrderedDict({ + 'sql': 'select * from vec_movies_metadatachunks01', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x06\x00\x00\x00horror\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00comedy\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00scifi\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00fantasy\x00\x00\x00\x00\x00\x0b\x00\x00\x00documentary\x00\x06\x00\x00\x00horror\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00comedy\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00scifi\x00\x00\x00\x00\x00\x00\x00', + }), + OrderedDict({ + 'rowid': 2, + 'data': b'\x07\x00\x00\x00fantasy\x00\x00\x00\x00\x00\x0b\x00\x00\x00documentary\x00\x06\x00\x00\x00horror\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00comedy\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00scifi\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00fantasy\x00\x00\x00\x00\x00\x0b\x00\x00\x00documentary\x00\x06\x00\x00\x00horror\x00\x00\x00\x00\x00\x00', + }), + OrderedDict({ + 'rowid': 3, + 'data': b'\x06\x00\x00\x00comedy\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00scifi\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00fantasy\x00\x00\x00\x00\x00\x0b\x00\x00\x00documentary\x00\x05\x00\x00\x00scifi\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00scifi\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00scifi\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00horror\x00\x00\x00\x00\x00\x00', + }), + OrderedDict({ + 'rowid': 4, + 'data': b'\x07\x00\x00\x00fantasy\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'vec_movies_metadatachunks02': OrderedDict({ + 'sql': 'select * from vec_movies_metadatachunks02', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x99\x00\x00\x00\x00\x00\x00\x00~\x01\x00\x00\x00\x00\x00\x005\x00\x00\x00\x00\x00\x00\x00\xd2\x00\x00\x00\x00\x00\x00\x00]\x00\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\x00\x00\xe2\x01\x00\x00\x00\x00\x00\x00-\x01\x00\x00\x00\x00\x00\x00', + }), + OrderedDict({ + 'rowid': 2, + 'data': b'\x86\x00\x00\x00\x00\x00\x00\x00B\x00\x00\x00\x00\x00\x00\x00X\x00\x00\x00\x00\x00\x00\x00;\x00\x00\x00\x00\x00\x00\x00\xa7\x01\x00\x00\x00\x00\x00\x00\x13\x01\x00\x00\x00\x00\x00\x00\xbf\x00\x00\x00\x00\x00\x00\x00:\x01\x00\x00\x00\x00\x00\x00', + }), + OrderedDict({ + 'rowid': 3, + 'data': b'J\x00\x00\x00\x00\x00\x00\x00\xc9\x00\x00\x00\x00\x00\x00\x00\x8f\x01\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00V\x01\x00\x00\x00\x00\x00\x00\xc3\x01\x00\x00\x00\x00\x00\x00\n\x02\x00\x00\x00\x00\x00\x00\x0f\x01\x00\x00\x00\x00\x00\x00', + }), + OrderedDict({ + 'rowid': 4, + 'data': b'6\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'vec_movies_metadatachunks03': OrderedDict({ + 'sql': 'select * from vec_movies_metadatachunks03', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'ffffff\x12@\xcd\xcc\xcc\xcc\xcc\xcc\x04@\x00\x00\x00\x00\x00\x00\x14@\xcd\xcc\xcc\xcc\xcc\xcc\x10@333333\x0b@\xcd\xcc\xcc\xcc\xcc\xcc\x12@333333\x07@\x00\x00\x00\x00\x00\x00\x14@', + }), + OrderedDict({ + 'rowid': 2, + 'data': b'ffffff\x10@\x9a\x99\x99\x99\x99\x99\t@\x9a\x99\x99\x99\x99\x99\x13@ffffff\x06@\x00\x00\x00\x00\x00\x00\x12@\xcd\xcc\xcc\xcc\xcc\xcc\x0c@\x9a\x99\x99\x99\x99\x99\x11@333333\x11@', + }), + OrderedDict({ + 'rowid': 3, + 'data': b'\x00\x00\x00\x00\x00\x00\x08@\x00\x00\x00\x00\x00\x00\x14@\x9a\x99\x99\x99\x99\x99\x05@333333\x13@\x00\x00\x00\x00\x00\x00\x10@\x9a\x99\x99\x99\x99\x99\x11@ffffff\x12@333333\x11@', + }), + OrderedDict({ + 'rowid': 4, + 'data': b'333333\x0f@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'vec_movies_metadatatext01': OrderedDict({ + 'sql': 'select * from vec_movies_metadatatext01', + 'rows': list([ + ]), + }), + 'vec_movies_rowids': OrderedDict({ + 'sql': 'select * from vec_movies_rowids', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 0, + }), + OrderedDict({ + 'rowid': 2, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 1, + }), + OrderedDict({ + 'rowid': 3, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 2, + }), + OrderedDict({ + 'rowid': 4, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 3, + }), + OrderedDict({ + 'rowid': 5, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 4, + }), + OrderedDict({ + 'rowid': 6, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 5, + }), + OrderedDict({ + 'rowid': 7, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 6, + }), + OrderedDict({ + 'rowid': 8, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 7, + }), + OrderedDict({ + 'rowid': 9, + 'id': None, + 'chunk_id': 2, + 'chunk_offset': 0, + }), + OrderedDict({ + 'rowid': 10, + 'id': None, + 'chunk_id': 2, + 'chunk_offset': 1, + }), + OrderedDict({ + 'rowid': 11, + 'id': None, + 'chunk_id': 2, + 'chunk_offset': 2, + }), + OrderedDict({ + 'rowid': 12, + 'id': None, + 'chunk_id': 2, + 'chunk_offset': 3, + }), + OrderedDict({ + 'rowid': 13, + 'id': None, + 'chunk_id': 2, + 'chunk_offset': 4, + }), + OrderedDict({ + 'rowid': 14, + 'id': None, + 'chunk_id': 2, + 'chunk_offset': 5, + }), + OrderedDict({ + 'rowid': 15, + 'id': None, + 'chunk_id': 2, + 'chunk_offset': 6, + }), + OrderedDict({ + 'rowid': 16, + 'id': None, + 'chunk_id': 2, + 'chunk_offset': 7, + }), + OrderedDict({ + 'rowid': 17, + 'id': None, + 'chunk_id': 3, + 'chunk_offset': 0, + }), + OrderedDict({ + 'rowid': 18, + 'id': None, + 'chunk_id': 3, + 'chunk_offset': 1, + }), + OrderedDict({ + 'rowid': 19, + 'id': None, + 'chunk_id': 3, + 'chunk_offset': 2, + }), + OrderedDict({ + 'rowid': 20, + 'id': None, + 'chunk_id': 3, + 'chunk_offset': 3, + }), + OrderedDict({ + 'rowid': 21, + 'id': None, + 'chunk_id': 3, + 'chunk_offset': 4, + }), + OrderedDict({ + 'rowid': 22, + 'id': None, + 'chunk_id': 3, + 'chunk_offset': 5, + }), + OrderedDict({ + 'rowid': 23, + 'id': None, + 'chunk_id': 3, + 'chunk_offset': 6, + }), + OrderedDict({ + 'rowid': 24, + 'id': None, + 'chunk_id': 3, + 'chunk_offset': 7, + }), + OrderedDict({ + 'rowid': 25, + 'id': None, + 'chunk_id': 4, + 'chunk_offset': 0, + }), + ]), + }), + 'vec_movies_vector_chunks00': OrderedDict({ + 'sql': 'select * from vec_movies_vector_chunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vectors': b'\x00\x00\x80?\x00\x00\x00@\x00\x00@@\x00\x00\x80@\x00\x00\xa0@\x00\x00\xc0@\x00\x00\xe0@\x00\x00\x00A', + }), + OrderedDict({ + 'rowid': 2, + 'vectors': b'\x00\x00\x10A\x00\x00 A\x00\x000A\x00\x00@A\x00\x00PA\x00\x00`A\x00\x00pA\x00\x00\x80A', + }), + OrderedDict({ + 'rowid': 3, + 'vectors': b'\x00\x00\x88A\x00\x00\x90A\x00\x00\x98A\x00\x00\xa0A\x00\x00\xa8A\x00\x00\xb0A\x00\x00\xb8A\x00\x00\xc0A', + }), + OrderedDict({ + 'rowid': 4, + 'vectors': b'\x00\x00\xc8A\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + }) +# --- +# name: test_stress.1 + OrderedDict({ + 'sql': ''' + + select + movie_id, + title, + genre, + num_reviews, + mean_rating, + is_favorited, + distance + from vec_movies + where synopsis_embedding match '[15.5]' + and genre = 'scifi' + and num_reviews between 100 and 500 + and mean_rating > 3.5 + and k = 5; + + ''', + 'rows': list([ + OrderedDict({ + 'movie_id': 13, + 'title': 'The Matrix', + 'genre': 'scifi', + 'num_reviews': 423, + 'mean_rating': 4.5, + 'is_favorited': 1, + 'distance': 2.5, + }), + OrderedDict({ + 'movie_id': 18, + 'title': 'Inception', + 'genre': 'scifi', + 'num_reviews': 201, + 'mean_rating': 5.0, + 'is_favorited': 1, + 'distance': 2.5, + }), + OrderedDict({ + 'movie_id': 21, + 'title': 'Gravity', + 'genre': 'scifi', + 'num_reviews': 342, + 'mean_rating': 4.0, + 'is_favorited': 1, + 'distance': 5.5, + }), + OrderedDict({ + 'movie_id': 22, + 'title': 'Dune', + 'genre': 'scifi', + 'num_reviews': 451, + 'mean_rating': 4.4, + 'is_favorited': 1, + 'distance': 6.5, + }), + OrderedDict({ + 'movie_id': 8, + 'title': 'Blade Runner 2049', + 'genre': 'scifi', + 'num_reviews': 301, + 'mean_rating': 5.0, + 'is_favorited': 0, + 'distance': 7.5, + }), + ]), + }) +# --- +# name: test_stress.2 + OrderedDict({ + 'sql': "select movie_id, genre, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and genre = 'horror'", + 'rows': list([ + OrderedDict({ + 'movie_id': 24, + 'genre': 'horror', + 'distance': 76.0, + }), + OrderedDict({ + 'movie_id': 16, + 'genre': 'horror', + 'distance': 84.0, + }), + OrderedDict({ + 'movie_id': 11, + 'genre': 'horror', + 'distance': 89.0, + }), + OrderedDict({ + 'movie_id': 6, + 'genre': 'horror', + 'distance': 94.0, + }), + OrderedDict({ + 'movie_id': 1, + 'genre': 'horror', + 'distance': 99.0, + }), + ]), + }) +# --- +# name: test_stress.3 + OrderedDict({ + 'sql': "select movie_id, genre, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and genre = 'comedy'", + 'rows': list([ + OrderedDict({ + 'movie_id': 17, + 'genre': 'comedy', + 'distance': 83.0, + }), + OrderedDict({ + 'movie_id': 12, + 'genre': 'comedy', + 'distance': 88.0, + }), + OrderedDict({ + 'movie_id': 7, + 'genre': 'comedy', + 'distance': 93.0, + }), + OrderedDict({ + 'movie_id': 2, + 'genre': 'comedy', + 'distance': 98.0, + }), + ]), + }) +# --- +# name: test_stress.4 + OrderedDict({ + 'sql': "select movie_id, num_reviews, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and num_reviews between 100 and 500", + 'rows': list([ + OrderedDict({ + 'movie_id': 25, + 'num_reviews': 310, + 'distance': 75.0, + }), + OrderedDict({ + 'movie_id': 24, + 'num_reviews': 271, + 'distance': 76.0, + }), + OrderedDict({ + 'movie_id': 22, + 'num_reviews': 451, + 'distance': 78.0, + }), + OrderedDict({ + 'movie_id': 21, + 'num_reviews': 342, + 'distance': 79.0, + }), + OrderedDict({ + 'movie_id': 20, + 'num_reviews': 186, + 'distance': 80.0, + }), + ]), + }) +# --- +# name: test_stress.5 + OrderedDict({ + 'sql': "select movie_id, num_reviews, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and num_reviews >= 500", + 'rows': list([ + OrderedDict({ + 'movie_id': 23, + 'num_reviews': 522, + 'distance': 77.0, + }), + ]), + }) +# --- +# name: test_stress.6 + OrderedDict({ + 'sql': "select movie_id, mean_rating, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and mean_rating < 3.0", + 'rows': list([ + OrderedDict({ + 'movie_id': 19, + 'mean_rating': 2.7, + 'distance': 81.0, + }), + OrderedDict({ + 'movie_id': 12, + 'mean_rating': 2.8, + 'distance': 88.0, + }), + OrderedDict({ + 'movie_id': 7, + 'mean_rating': 2.9, + 'distance': 93.0, + }), + OrderedDict({ + 'movie_id': 2, + 'mean_rating': 2.6, + 'distance': 98.0, + }), + ]), + }) +# --- +# name: test_stress.7 + OrderedDict({ + 'sql': "select movie_id, mean_rating, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and mean_rating between 4.0 and 5.0", + 'rows': list([ + OrderedDict({ + 'movie_id': 24, + 'mean_rating': 4.3, + 'distance': 76.0, + }), + OrderedDict({ + 'movie_id': 23, + 'mean_rating': 4.6, + 'distance': 77.0, + }), + OrderedDict({ + 'movie_id': 22, + 'mean_rating': 4.4, + 'distance': 78.0, + }), + OrderedDict({ + 'movie_id': 21, + 'mean_rating': 4.0, + 'distance': 79.0, + }), + OrderedDict({ + 'movie_id': 20, + 'mean_rating': 4.8, + 'distance': 80.0, + }), + ]), + }) +# --- +# name: test_stress[bool-eq-false] + OrderedDict({ + 'sql': "select movie_id, is_favorited, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and is_favorited = FALSE", + 'rows': list([ + OrderedDict({ + 'movie_id': 16, + 'is_favorited': 0, + 'distance': 84.0, + }), + OrderedDict({ + 'movie_id': 14, + 'is_favorited': 0, + 'distance': 86.0, + }), + OrderedDict({ + 'movie_id': 12, + 'is_favorited': 0, + 'distance': 88.0, + }), + OrderedDict({ + 'movie_id': 10, + 'is_favorited': 0, + 'distance': 90.0, + }), + OrderedDict({ + 'movie_id': 8, + 'is_favorited': 0, + 'distance': 92.0, + }), + ]), + }) +# --- +# name: test_stress[bool-eq-true] + OrderedDict({ + 'sql': "select movie_id, is_favorited, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and is_favorited = TRUE", + 'rows': list([ + OrderedDict({ + 'movie_id': 25, + 'is_favorited': 1, + 'distance': 75.0, + }), + OrderedDict({ + 'movie_id': 24, + 'is_favorited': 1, + 'distance': 76.0, + }), + OrderedDict({ + 'movie_id': 23, + 'is_favorited': 1, + 'distance': 77.0, + }), + OrderedDict({ + 'movie_id': 22, + 'is_favorited': 1, + 'distance': 78.0, + }), + OrderedDict({ + 'movie_id': 21, + 'is_favorited': 1, + 'distance': 79.0, + }), + ]), + }) +# --- +# name: test_stress[bool-ne-false] + OrderedDict({ + 'sql': "select movie_id, is_favorited, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and is_favorited != FALSE", + 'rows': list([ + OrderedDict({ + 'movie_id': 25, + 'is_favorited': 1, + 'distance': 75.0, + }), + OrderedDict({ + 'movie_id': 24, + 'is_favorited': 1, + 'distance': 76.0, + }), + OrderedDict({ + 'movie_id': 23, + 'is_favorited': 1, + 'distance': 77.0, + }), + OrderedDict({ + 'movie_id': 22, + 'is_favorited': 1, + 'distance': 78.0, + }), + OrderedDict({ + 'movie_id': 21, + 'is_favorited': 1, + 'distance': 79.0, + }), + ]), + }) +# --- +# name: test_stress[bool-ne-true] + OrderedDict({ + 'sql': "select movie_id, is_favorited, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and is_favorited != TRUE", + 'rows': list([ + OrderedDict({ + 'movie_id': 16, + 'is_favorited': 0, + 'distance': 84.0, + }), + OrderedDict({ + 'movie_id': 14, + 'is_favorited': 0, + 'distance': 86.0, + }), + OrderedDict({ + 'movie_id': 12, + 'is_favorited': 0, + 'distance': 88.0, + }), + OrderedDict({ + 'movie_id': 10, + 'is_favorited': 0, + 'distance': 90.0, + }), + OrderedDict({ + 'movie_id': 8, + 'is_favorited': 0, + 'distance': 92.0, + }), + ]), + }) +# --- +# name: test_stress[bool-other-op] + dict({ + 'error': 'OperationalError', + 'message': 'ONLY EQUALS (=) or NOT_EQUALS (!=) operators are allowed on boolean metadata columns.', + }) +# --- +# name: test_text_knn + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + ]), + }), + 'v_metadatachunks00': OrderedDict({ + 'sql': 'select * from v_metadatachunks00', + 'rows': list([ + ]), + }), + 'v_metadatatext00': OrderedDict({ + 'sql': 'select * from v_metadatatext00', + 'rows': list([ + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + ]), + }), + }) +# --- +# name: test_text_knn.1 + OrderedDict({ + 'sql': 'select * from v', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vector': b'\xaeG\xe1=', + 'name': 'aaa', + }), + OrderedDict({ + 'rowid': 2, + 'vector': b'\xaeGa>', + 'name': 'bbb', + }), + OrderedDict({ + 'rowid': 3, + 'vector': b'\xc3\xf5\xa8>', + 'name': 'ccc', + }), + OrderedDict({ + 'rowid': 4, + 'vector': b'\xaeG\xe1>', + 'name': 'ddd', + }), + OrderedDict({ + 'rowid': 5, + 'vector': b'\xcd\xcc\x0c?', + 'name': 'eee', + }), + OrderedDict({ + 'rowid': 6, + 'vector': b'\xc3\xf5(?', + 'name': 'fff', + }), + OrderedDict({ + 'rowid': 7, + 'vector': b'\xb8\x1eE?', + 'name': 'ggg', + }), + OrderedDict({ + 'rowid': 8, + 'vector': b'\xaeGa?', + 'name': 'hhh', + }), + OrderedDict({ + 'rowid': 9, + 'vector': b'\xa4p}?', + 'name': 'iii', + }), + ]), + }) +# --- +# name: test_text_knn.2 + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + OrderedDict({ + 'chunk_id': 1, + 'size': 8, + 'validity': b'\xff', + 'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00', + }), + OrderedDict({ + 'chunk_id': 2, + 'size': 8, + 'validity': b'\x01', + 'rowids': b'\t\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks00': OrderedDict({ + 'sql': 'select * from v_metadatachunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x03\x00\x00\x00aaa\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00bbb\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00ccc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00ddd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00eee\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00fff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00ggg\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00hhh\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + OrderedDict({ + 'rowid': 2, + 'data': b'\x03\x00\x00\x00iii\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatatext00': OrderedDict({ + 'sql': 'select * from v_metadatatext00', + 'rows': list([ + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 0, + }), + OrderedDict({ + 'rowid': 2, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 1, + }), + OrderedDict({ + 'rowid': 3, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 2, + }), + OrderedDict({ + 'rowid': 4, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 3, + }), + OrderedDict({ + 'rowid': 5, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 4, + }), + OrderedDict({ + 'rowid': 6, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 5, + }), + OrderedDict({ + 'rowid': 7, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 6, + }), + OrderedDict({ + 'rowid': 8, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 7, + }), + OrderedDict({ + 'rowid': 9, + 'id': None, + 'chunk_id': 2, + 'chunk_offset': 0, + }), + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vectors': b'\xaeG\xe1=\xaeGa>\xc3\xf5\xa8>\xaeG\xe1>\xcd\xcc\x0c?\xc3\xf5(?\xb8\x1eE?\xaeGa?', + }), + OrderedDict({ + 'rowid': 2, + 'vectors': b'\xa4p}?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + }) +# --- +# name: test_text_knn.3 + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[1]' and k = 5", + 'rows': list([ + OrderedDict({ + 'rowid': 9, + 'name': 'iii', + 'distance': 0.009999990463256836, + }), + OrderedDict({ + 'rowid': 8, + 'name': 'hhh', + 'distance': 0.12000000476837158, + }), + OrderedDict({ + 'rowid': 7, + 'name': 'ggg', + 'distance': 0.23000001907348633, + }), + OrderedDict({ + 'rowid': 6, + 'name': 'fff', + 'distance': 0.3399999737739563, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'eee', + 'distance': 0.44999998807907104, + }), + ]), + }) +# --- +# name: test_text_knn.4 + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[1]' and k = 5 and name < 'ddd'", + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 'name': 'ccc', + 'distance': 0.6699999570846558, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'bbb', + 'distance': 0.7799999713897705, + }), + OrderedDict({ + 'rowid': 1, + 'name': 'aaa', + 'distance': 0.8899999856948853, + }), + ]), + }) +# --- +# name: test_text_knn.5 + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[1]' and k = 5 and name <= 'ddd'", + 'rows': list([ + OrderedDict({ + 'rowid': 4, + 'name': 'ddd', + 'distance': 0.5600000023841858, + }), + OrderedDict({ + 'rowid': 3, + 'name': 'ccc', + 'distance': 0.6699999570846558, + }), + OrderedDict({ + 'rowid': 2, + 'name': 'bbb', + 'distance': 0.7799999713897705, + }), + OrderedDict({ + 'rowid': 1, + 'name': 'aaa', + 'distance': 0.8899999856948853, + }), + ]), + }) +# --- +# name: test_text_knn.6 + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[1]' and k = 5 and name > 'fff'", + 'rows': list([ + OrderedDict({ + 'rowid': 9, + 'name': 'iii', + 'distance': 0.009999990463256836, + }), + OrderedDict({ + 'rowid': 8, + 'name': 'hhh', + 'distance': 0.12000000476837158, + }), + OrderedDict({ + 'rowid': 7, + 'name': 'ggg', + 'distance': 0.23000001907348633, + }), + ]), + }) +# --- +# name: test_text_knn.7 + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[1]' and k = 5 and name >= 'fff'", + 'rows': list([ + OrderedDict({ + 'rowid': 9, + 'name': 'iii', + 'distance': 0.009999990463256836, + }), + OrderedDict({ + 'rowid': 8, + 'name': 'hhh', + 'distance': 0.12000000476837158, + }), + OrderedDict({ + 'rowid': 7, + 'name': 'ggg', + 'distance': 0.23000001907348633, + }), + OrderedDict({ + 'rowid': 6, + 'name': 'fff', + 'distance': 0.3399999737739563, + }), + ]), + }) +# --- +# name: test_text_knn.8 + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[1]' and k = 5 and name = 'aaa'", + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'name': 'aaa', + 'distance': 0.8899999856948853, + }), + ]), + }) +# --- +# name: test_text_knn.9 + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[.01]' and k = 5 and name != 'aaa'", + 'rows': list([ + OrderedDict({ + 'rowid': 2, + 'name': 'bbb', + 'distance': 0.20999999344348907, + }), + OrderedDict({ + 'rowid': 3, + 'name': 'ccc', + 'distance': 0.320000022649765, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'ddd', + 'distance': 0.4300000071525574, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'eee', + 'distance': 0.5400000214576721, + }), + OrderedDict({ + 'rowid': 6, + 'name': 'fff', + 'distance': 0.6500000357627869, + }), + ]), + }) +# --- +# name: test_types[illegal-boolean] + dict({ + 'error': 'OperationalError', + 'message': 'Expected 0 or 1 for BOOLEAN metadata column b', + }) +# --- +# name: test_types[illegal-type-boolean] + dict({ + 'error': 'OperationalError', + 'message': 'Expected 0 or 1 for BOOLEAN metadata column b', + }) +# --- +# name: test_types[illegal-type-float] + dict({ + 'error': 'OperationalError', + 'message': 'Expected float for FLOAT metadata column f, received TEXT', + }) +# --- +# name: test_types[illegal-type-int] + dict({ + 'error': 'OperationalError', + 'message': 'Expected integer for INTEGER metadata column n, received TEXT', + }) +# --- +# name: test_types[illegal-type-text] + dict({ + 'error': 'OperationalError', + 'message': 'Expected text for TEXT metadata column t, received INTEGER', + }) +# --- +# name: test_types[legal] + OrderedDict({ + 'sql': 'insert into v(vector, b, n, f, t) values (?, ?, ?, ?, ?)', + 'rows': list([ + ]), + }) +# --- +# name: test_updates[1-init-contents] + OrderedDict({ + 'sql': 'select * from v', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vector': b'\x11\x11\x11\x11', + 'b': 1, + 'n': 1, + 'f': 1.1, + 't': 'test1', + }), + OrderedDict({ + 'rowid': 2, + 'vector': b'""""', + 'b': 1, + 'n': 2, + 'f': 2.2, + 't': 'test2', + }), + OrderedDict({ + 'rowid': 3, + 'vector': b'3333', + 'b': 1, + 'n': 3, + 'f': 3.3, + 't': '1234567890123', + }), + ]), + }) +# --- +# name: test_updates[1-init-shadow] + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + OrderedDict({ + 'chunk_id': 1, + 'size': 8, + 'validity': b'\x07', + 'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks00': OrderedDict({ + 'sql': 'select * from v_metadatachunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x07', + }), + ]), + }), + 'v_metadatachunks01': OrderedDict({ + 'sql': 'select * from v_metadatachunks01', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks02': OrderedDict({ + 'sql': 'select * from v_metadatachunks02', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks03': OrderedDict({ + 'sql': 'select * from v_metadatachunks03', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x05\x00\x00\x00test1\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00test2\x00\x00\x00\x00\x00\x00\x00\r\x00\x00\x00123456789012\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatatext03': OrderedDict({ + 'sql': 'select * from v_metadatatext03', + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 'data': '1234567890123', + }), + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 0, + }), + OrderedDict({ + 'rowid': 2, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 1, + }), + OrderedDict({ + 'rowid': 3, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 2, + }), + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + }) +# --- +# name: test_updates[general-update-contents] + OrderedDict({ + 'sql': 'select * from v', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vector': b'\x11\x11\x11\x11', + 'b': 0, + 'n': 11, + 'f': 11.11, + 't': 'newtest1', + }), + OrderedDict({ + 'rowid': 2, + 'vector': b'""""', + 'b': 1, + 'n': 2, + 'f': 2.2, + 't': 'test2', + }), + OrderedDict({ + 'rowid': 3, + 'vector': b'3333', + 'b': 1, + 'n': 3, + 'f': 3.3, + 't': '1234567890123', + }), + ]), + }) +# --- +# name: test_updates[general-update-shaodnw] + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + OrderedDict({ + 'chunk_id': 1, + 'size': 8, + 'validity': b'\x07', + 'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks00': OrderedDict({ + 'sql': 'select * from v_metadatachunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x06', + }), + ]), + }), + 'v_metadatachunks01': OrderedDict({ + 'sql': 'select * from v_metadatachunks01', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x0b\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks02': OrderedDict({ + 'sql': 'select * from v_metadatachunks02', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\xb8\x1e\x85\xebQ8&@\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks03': OrderedDict({ + 'sql': 'select * from v_metadatachunks03', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x08\x00\x00\x00newtest1\x00\x00\x00\x00\x05\x00\x00\x00test2\x00\x00\x00\x00\x00\x00\x00\r\x00\x00\x00123456789012\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatatext03': OrderedDict({ + 'sql': 'select * from v_metadatatext03', + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 'data': '1234567890123', + }), + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 0, + }), + OrderedDict({ + 'rowid': 2, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 1, + }), + OrderedDict({ + 'rowid': 3, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 2, + }), + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + }) +# --- +# name: test_updates[string-update-1-contents] + OrderedDict({ + 'sql': 'select * from v', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vector': b'\x11\x11\x11\x11', + 'b': 0, + 'n': 11, + 'f': 11.11, + 't': 'newtest1', + }), + OrderedDict({ + 'rowid': 2, + 'vector': b'""""', + 'b': 1, + 'n': 2, + 'f': 2.2, + 't': 'test2', + }), + OrderedDict({ + 'rowid': 3, + 'vector': b'3333', + 'b': 1, + 'n': 3, + 'f': 3.3, + 't': '1234567890123-updated', + }), + ]), + }) +# --- +# name: test_updates[string-update-1-shadow] + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + OrderedDict({ + 'chunk_id': 1, + 'size': 8, + 'validity': b'\x07', + 'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks00': OrderedDict({ + 'sql': 'select * from v_metadatachunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x06', + }), + ]), + }), + 'v_metadatachunks01': OrderedDict({ + 'sql': 'select * from v_metadatachunks01', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x0b\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks02': OrderedDict({ + 'sql': 'select * from v_metadatachunks02', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\xb8\x1e\x85\xebQ8&@\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks03': OrderedDict({ + 'sql': 'select * from v_metadatachunks03', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x08\x00\x00\x00newtest1\x00\x00\x00\x00\x05\x00\x00\x00test2\x00\x00\x00\x00\x00\x00\x00\x15\x00\x00\x00123456789012\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatatext03': OrderedDict({ + 'sql': 'select * from v_metadatatext03', + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 'data': '1234567890123-updated', + }), + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 0, + }), + OrderedDict({ + 'rowid': 2, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 1, + }), + OrderedDict({ + 'rowid': 3, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 2, + }), + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + }) +# --- +# name: test_updates[string-update-2-contents] + OrderedDict({ + 'sql': 'select * from v', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vector': b'\x11\x11\x11\x11', + 'b': 0, + 'n': 11, + 'f': 11.11, + 't': 'newtest1', + }), + OrderedDict({ + 'rowid': 2, + 'vector': b'""""', + 'b': 1, + 'n': 2, + 'f': 2.2, + 't': 'test2-short', + }), + OrderedDict({ + 'rowid': 3, + 'vector': b'3333', + 'b': 1, + 'n': 3, + 'f': 3.3, + 't': '1234567890123-updated', + }), + ]), + }) +# --- +# name: test_updates[string-update-2-shadow] + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + OrderedDict({ + 'chunk_id': 1, + 'size': 8, + 'validity': b'\x07', + 'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks00': OrderedDict({ + 'sql': 'select * from v_metadatachunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x06', + }), + ]), + }), + 'v_metadatachunks01': OrderedDict({ + 'sql': 'select * from v_metadatachunks01', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x0b\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks02': OrderedDict({ + 'sql': 'select * from v_metadatachunks02', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\xb8\x1e\x85\xebQ8&@\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks03': OrderedDict({ + 'sql': 'select * from v_metadatachunks03', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x08\x00\x00\x00newtest1\x00\x00\x00\x00\x0b\x00\x00\x00test2-short\x00\x15\x00\x00\x00123456789012\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatatext03': OrderedDict({ + 'sql': 'select * from v_metadatatext03', + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 'data': '1234567890123-updated', + }), + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 0, + }), + OrderedDict({ + 'rowid': 2, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 1, + }), + OrderedDict({ + 'rowid': 3, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 2, + }), + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + }) +# --- +# name: test_updates[string-update-3-contents] + OrderedDict({ + 'sql': 'select * from v', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vector': b'\x11\x11\x11\x11', + 'b': 0, + 'n': 11, + 'f': 11.11, + 't': 'newtest1', + }), + OrderedDict({ + 'rowid': 2, + 'vector': b'""""', + 'b': 1, + 'n': 2, + 'f': 2.2, + 't': 'test2-long-long-long', + }), + OrderedDict({ + 'rowid': 3, + 'vector': b'3333', + 'b': 1, + 'n': 3, + 'f': 3.3, + 't': '1234567890123-updated', + }), + ]), + }) +# --- +# name: test_updates[string-update-3-shadow] + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + OrderedDict({ + 'chunk_id': 1, + 'size': 8, + 'validity': b'\x07', + 'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks00': OrderedDict({ + 'sql': 'select * from v_metadatachunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x06', + }), + ]), + }), + 'v_metadatachunks01': OrderedDict({ + 'sql': 'select * from v_metadatachunks01', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x0b\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks02': OrderedDict({ + 'sql': 'select * from v_metadatachunks02', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\xb8\x1e\x85\xebQ8&@\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks03': OrderedDict({ + 'sql': 'select * from v_metadatachunks03', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x08\x00\x00\x00newtest1\x00\x00\x00\x00\x14\x00\x00\x00test2-long-l\x15\x00\x00\x00123456789012\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatatext03': OrderedDict({ + 'sql': 'select * from v_metadatatext03', + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 'data': '1234567890123-updated', + }), + OrderedDict({ + 'rowid': 2, + 'data': 'test2-long-long-long', + }), + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 0, + }), + OrderedDict({ + 'rowid': 2, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 1, + }), + OrderedDict({ + 'rowid': 3, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 2, + }), + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + }) +# --- +# name: test_updates[string-update-4-contents] + OrderedDict({ + 'sql': 'select * from v', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vector': b'\x11\x11\x11\x11', + 'b': 0, + 'n': 11, + 'f': 11.11, + 't': 'newtest1', + }), + OrderedDict({ + 'rowid': 2, + 'vector': b'""""', + 'b': 1, + 'n': 2, + 'f': 2.2, + 't': 'test2-shortx', + }), + OrderedDict({ + 'rowid': 3, + 'vector': b'3333', + 'b': 1, + 'n': 3, + 'f': 3.3, + 't': '1234567890123-updated', + }), + ]), + }) +# --- +# name: test_updates[string-update-4-shadow] + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + OrderedDict({ + 'chunk_id': 1, + 'size': 8, + 'validity': b'\x07', + 'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks00': OrderedDict({ + 'sql': 'select * from v_metadatachunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x06', + }), + ]), + }), + 'v_metadatachunks01': OrderedDict({ + 'sql': 'select * from v_metadatachunks01', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x0b\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks02': OrderedDict({ + 'sql': 'select * from v_metadatachunks02', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\xb8\x1e\x85\xebQ8&@\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatachunks03': OrderedDict({ + 'sql': 'select * from v_metadatachunks03', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x08\x00\x00\x00newtest1\x00\x00\x00\x00\x0c\x00\x00\x00test2-shortx\x15\x00\x00\x00123456789012\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadatatext03': OrderedDict({ + 'sql': 'select * from v_metadatatext03', + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 'data': '1234567890123-updated', + }), + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 0, + }), + OrderedDict({ + 'rowid': 2, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 1, + }), + OrderedDict({ + 'rowid': 3, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 2, + }), + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + }) +# --- +# name: test_vtab_in[allow-int-all] + OrderedDict({ + 'sql': "select rowid, n, distance from v where vector match '[0]' and k = 8 and n in (555, 999)", + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'n': 999, + 'distance': 1.0, + }), + OrderedDict({ + 'rowid': 2, + 'n': 555, + 'distance': 2.0, + }), + OrderedDict({ + 'rowid': 3, + 'n': 999, + 'distance': 3.0, + }), + OrderedDict({ + 'rowid': 4, + 'n': 555, + 'distance': 4.0, + }), + OrderedDict({ + 'rowid': 5, + 'n': 999, + 'distance': 5.0, + }), + OrderedDict({ + 'rowid': 6, + 'n': 555, + 'distance': 6.0, + }), + OrderedDict({ + 'rowid': 7, + 'n': 999, + 'distance': 7.0, + }), + OrderedDict({ + 'rowid': 8, + 'n': 555, + 'distance': 8.0, + }), + ]), + }) +# --- +# name: test_vtab_in[allow-int-superfluous] + OrderedDict({ + 'sql': "select rowid, n, distance from v where vector match '[0]' and k = 8 and n in (555, -1, -2)", + 'rows': list([ + OrderedDict({ + 'rowid': 2, + 'n': 555, + 'distance': 2.0, + }), + OrderedDict({ + 'rowid': 4, + 'n': 555, + 'distance': 4.0, + }), + OrderedDict({ + 'rowid': 6, + 'n': 555, + 'distance': 6.0, + }), + OrderedDict({ + 'rowid': 8, + 'n': 555, + 'distance': 8.0, + }), + ]), + }) +# --- +# name: test_vtab_in[allow-text-all] + OrderedDict({ + 'sql': "select rowid, t, distance from v where vector match '[0]' and k = 8 and t in ('aaaa', 'zzzz')", + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 't': 'aaaa', + 'distance': 1.0, + }), + OrderedDict({ + 'rowid': 2, + 't': 'aaaa', + 'distance': 2.0, + }), + OrderedDict({ + 'rowid': 3, + 't': 'aaaa', + 'distance': 3.0, + }), + OrderedDict({ + 'rowid': 4, + 't': 'aaaa', + 'distance': 4.0, + }), + OrderedDict({ + 'rowid': 5, + 't': 'zzzz', + 'distance': 5.0, + }), + OrderedDict({ + 'rowid': 6, + 't': 'zzzz', + 'distance': 6.0, + }), + OrderedDict({ + 'rowid': 7, + 't': 'zzzz', + 'distance': 7.0, + }), + OrderedDict({ + 'rowid': 8, + 't': 'zzzz', + 'distance': 8.0, + }), + ]), + }) +# --- +# name: test_vtab_in[allow-text-superfluous] + OrderedDict({ + 'sql': "select rowid, t, distance from v where vector match '[0]' and k = 8 and t in ('aaaa', 'foo', 'bar')", + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 't': 'aaaa', + 'distance': 1.0, + }), + OrderedDict({ + 'rowid': 2, + 't': 'aaaa', + 'distance': 2.0, + }), + OrderedDict({ + 'rowid': 3, + 't': 'aaaa', + 'distance': 3.0, + }), + OrderedDict({ + 'rowid': 4, + 't': 'aaaa', + 'distance': 4.0, + }), + ]), + }) +# --- +# name: test_vtab_in[block-bool] + dict({ + 'error': 'OperationalError', + 'message': "'xxx in (...)' is only available on INTEGER or TEXT metadata columns.", + }) +# --- +# name: test_vtab_in[block-float] + dict({ + 'error': 'OperationalError', + 'message': "'xxx in (...)' is only available on INTEGER or TEXT metadata columns.", + }) +# --- +# name: test_vtab_in_long_text[all] + OrderedDict({ + 'sql': "select rowid, t from v where vector match '[0]' and k = 10 and t in (select value from json_each(?))", + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 't': 'aaaa', + }), + OrderedDict({ + 'rowid': 2, + 't': 'aaaaaaaaaaaa_aaa', + }), + OrderedDict({ + 'rowid': 3, + 't': 'bbbb', + }), + OrderedDict({ + 'rowid': 4, + 't': 'bbbbbbbbbbbb_bbb', + }), + OrderedDict({ + 'rowid': 5, + 't': 'cccc', + }), + OrderedDict({ + 'rowid': 6, + 't': 'cccccccccccc_ccc', + }), + ]), + }) +# --- +# name: test_vtab_in_long_text[individual-aaaa] + OrderedDict({ + 'sql': "select rowid, t from v where vector match '[0]' and k = 10 and t in (?, 'nonsense')", + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 't': 'aaaa', + }), + ]), + }) +# --- +# name: test_vtab_in_long_text[individual-aaaaaaaaaaaa_aaa] + OrderedDict({ + 'sql': "select rowid, t from v where vector match '[0]' and k = 10 and t in (?, 'nonsense')", + 'rows': list([ + OrderedDict({ + 'rowid': 2, + 't': 'aaaaaaaaaaaa_aaa', + }), + ]), + }) +# --- +# name: test_vtab_in_long_text[individual-bbbb] + OrderedDict({ + 'sql': "select rowid, t from v where vector match '[0]' and k = 10 and t in (?, 'nonsense')", + 'rows': list([ + OrderedDict({ + 'rowid': 3, + 't': 'bbbb', + }), + ]), + }) +# --- +# name: test_vtab_in_long_text[individual-bbbbbbbbbbbb_bbb] + OrderedDict({ + 'sql': "select rowid, t from v where vector match '[0]' and k = 10 and t in (?, 'nonsense')", + 'rows': list([ + OrderedDict({ + 'rowid': 4, + 't': 'bbbbbbbbbbbb_bbb', + }), + ]), + }) +# --- +# name: test_vtab_in_long_text[individual-cccc] + OrderedDict({ + 'sql': "select rowid, t from v where vector match '[0]' and k = 10 and t in (?, 'nonsense')", + 'rows': list([ + OrderedDict({ + 'rowid': 5, + 't': 'cccc', + }), + ]), + }) +# --- +# name: test_vtab_in_long_text[individual-cccccccccccc_ccc] + OrderedDict({ + 'sql': "select rowid, t from v where vector match '[0]' and k = 10 and t in (?, 'nonsense')", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 't': 'cccccccccccc_ccc', + }), + ]), + }) +# --- diff --git a/tests/afbd/.gitignore b/tests/afbd/.gitignore new file mode 100644 index 0000000..aa1ec1e --- /dev/null +++ b/tests/afbd/.gitignore @@ -0,0 +1 @@ +*.tgz diff --git a/tests/afbd/.python-version b/tests/afbd/.python-version new file mode 100644 index 0000000..e4fba21 --- /dev/null +++ b/tests/afbd/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/tests/afbd/Makefile b/tests/afbd/Makefile new file mode 100644 index 0000000..083b429 --- /dev/null +++ b/tests/afbd/Makefile @@ -0,0 +1,9 @@ +random_ints_1m.tgz: + curl -o $@ https://storage.googleapis.com/ann-filtered-benchmark/datasets/random_ints_1m.tgz + +random_float_1m.tgz: + curl -o $@ https://storage.googleapis.com/ann-filtered-benchmark/datasets/random_float_1m.tgz + +random_keywords_1m.tgz: + curl -o $@ https://storage.googleapis.com/ann-filtered-benchmark/datasets/random_keywords_1m.tgz +all: random_ints_1m.tgz random_float_1m.tgz random_keywords_1m.tgz diff --git a/tests/afbd/README.md b/tests/afbd/README.md new file mode 100644 index 0000000..be7b6e5 --- /dev/null +++ b/tests/afbd/README.md @@ -0,0 +1,12 @@ + +# hnm + +``` +tar -xOzf hnm.tgz ./tests.jsonl > tests.jsonl +solite q "select group_concat(distinct key) from lines_read('tests.jsonl'), json_each(line -> '$.conditions.and[0]')" +``` + + +``` +> python test-afbd.py build hnm.tgz --metadata product_group_name,colour_group_name,index_group_name,perceived_colour_value_name,section_name,product_type_name,department_name,graphical_appearance_name,garment_group_name,perceived_colour_master_name +``` diff --git a/tests/afbd/test-afbd.py b/tests/afbd/test-afbd.py new file mode 100644 index 0000000..4009442 --- /dev/null +++ b/tests/afbd/test-afbd.py @@ -0,0 +1,231 @@ +import numpy as np +from tqdm import tqdm +from deepdiff import DeepDiff + +import tarfile +import json +from io import BytesIO +import sqlite3 +from typing import List +from struct import pack +import time +from pathlib import Path +import argparse + + +def serialize_float32(vector: List[float]) -> bytes: + """Serializes a list of floats into the "raw bytes" format sqlite-vec expects""" + return pack("%sf" % len(vector), *vector) + + +def build_command(file_path, metadata_set=None): + if metadata_set: + metadata_set = set(metadata_set.split(",")) + + file_path = Path(file_path) + print(f"reading {file_path}...") + t0 = time.time() + with tarfile.open(file_path, "r:gz") as archive: + for file in archive: + if file.name == "./payloads.jsonl": + payloads = [ + json.loads(line) + for line in archive.extractfile(file.name).readlines() + ] + if file.name == "./tests.jsonl": + tests = [ + json.loads(line) + for line in archive.extractfile(file.name).readlines() + ] + if file.name == "./vectors.npy": + f = BytesIO() + f.write(archive.extractfile(file.name).read()) + f.seek(0) + vectors = np.load(f) + + assert payloads is not None + assert tests is not None + assert vectors is not None + dimensions = vectors.shape[1] + metadata_columns = sorted(list(payloads[0].keys())) + + def col_type(v): + if isinstance(v, int): + return "integer" + if isinstance(v, float): + return "float" + if isinstance(v, str): + return "text" + raise Exception(f"Unknown column type: {v}") + + metadata_columns_types = [col_type(payloads[0][col]) for col in metadata_columns] + + print(time.time() - t0) + t0 = time.time() + print("seeding...") + + db = sqlite3.connect(f"{file_path.stem}.db") + db.execute("PRAGMA page_size = 16384") + db.row_factory = sqlite3.Row + db.enable_load_extension(True) + db.load_extension("../../dist/vec0") + db.enable_load_extension(False) + + with db: + db.execute("create table tests(data)") + + for test in tests: + db.execute("insert into tests values (?)", [json.dumps(test)]) + + with db: + create_sql = f"create virtual table v using vec0(vector float[{dimensions}] distance_metric=cosine" + insert_sql = "insert into v(rowid, vector" + for name, type in zip(metadata_columns, metadata_columns_types): + if metadata_set: + if name in metadata_set: + create_sql += f", {name} {type}" + else: + create_sql += f", +{name} {type}" + else: + create_sql += f", {name} {type}" + + insert_sql += f", {name}" + create_sql += ")" + insert_sql += ") values (" + ",".join("?" * (2 + len(metadata_columns))) + ")" + print(create_sql) + print(insert_sql) + + db.execute(create_sql) + + for idx, (payload, vector) in enumerate( + tqdm(zip(payloads, vectors), total=len(payloads)) + ): + params = [idx, vector] + for c in metadata_columns: + params.append(payload[c]) + db.execute(insert_sql, params) + + print(time.time() - t0) + + +def tests_command(file_path): + file_path = Path(file_path) + db = sqlite3.connect(f"{file_path.stem}.db") + db.execute("PRAGMA cache_size = -100000000") + db.row_factory = sqlite3.Row + db.enable_load_extension(True) + db.load_extension("../../dist/vec0") + db.enable_load_extension(False) + + tests = [ + json.loads(row["data"]) + for row in db.execute("select data from tests").fetchall() + ] + + num_or_skips = 0 + num_1off_errors = 0 + + t0 = time.time() + print("testing...") + for idx, test in enumerate(tqdm(tests)): + query = test["query"] + conditions = test["conditions"] + expected_closest_ids = test["closest_ids"] + expected_closest_scores = test["closest_scores"] + + sql = "select rowid, 1 - distance as similarity from v where vector match ? and k = ?" + params = [serialize_float32(query), len(expected_closest_ids)] + + if "and" in conditions: + for condition in conditions["and"]: + assert len(condition.keys()) == 1 + column = list(condition.keys())[0] + assert len(list(condition[column].keys())) == 1 + condition_type = list(condition[column].keys())[0] + if condition_type == "match": + value = condition[column]["match"]["value"] + sql += f" and {column} = ?" + params.append(value) + elif condition_type == "range": + sql += f" and {column} between ? and ?" + params.append(condition[column]["range"]["gt"]) + params.append(condition[column]["range"]["lt"]) + else: + raise Exception(f"Unknown condition type: {condition_type}") + elif "or" in conditions: + column = list(conditions["or"][0].keys())[0] + condition_type = list(conditions["or"][0][column].keys())[0] + assert condition_type == "match" + sql += f" and {column} in (" + for idx, condition in enumerate(conditions["or"]): + if condition_type == "match": + value = condition[column]["match"]["value"] + if idx != 0: + sql += "," + sql += "?" + params.append(value) + elif condition_type == "range": + breakpoint() + else: + raise Exception(f"Unknown condition type: {condition_type}") + sql += ")" + + # print(sql, params[1:]) + rows = db.execute(sql, params).fetchall() + actual_closest_ids = [row["rowid"] for row in rows] + matches = expected_closest_ids == actual_closest_ids + if not matches: + diff = DeepDiff( + expected_closest_ids, actual_closest_ids, ignore_order=False + ) + assert len(list(diff.keys())) == 1 + assert "values_changed" in diff.keys() + keys_changed = list(diff["values_changed"].keys()) + if len(keys_changed) == 2: + akey, bkey = keys_changed + a = int(akey.lstrip("root[").rstrip("]")) + b = int(bkey.lstrip("root[").rstrip("]")) + assert abs(a - b) == 1 + assert ( + diff["values_changed"][akey]["new_value"] + == diff["values_changed"][bkey]["old_value"] + ) + assert ( + diff["values_changed"][akey]["old_value"] + == diff["values_changed"][bkey]["new_value"] + ) + elif len(keys_changed) == 1: + v = int(keys_changed[0].lstrip("root[").rstrip("]")) + assert (v + 1) == len(expected_closest_ids) + else: + raise Exception("fuck") + num_1off_errors += 1 + # print(closest_scores) + # print([row["similarity"] for row in rows]) + # assert closest_scores == [row["similarity"] for row in rows] + print("Number skipped: ", num_or_skips) + print("Num 1 off errors: ", num_1off_errors) + print("1 off error rate: ", num_1off_errors / (len(tests) - num_or_skips)) + print(time.time() - t0) + print("done") + + +def main(): + parser = argparse.ArgumentParser(description="CLI tool") + subparsers = parser.add_subparsers(dest="command", required=True) + + build_parser = subparsers.add_parser("build") + build_parser.add_argument("file", type=str, help="Path to input file") + build_parser.add_argument("--metadata", type=str, help="Metadata columns") + build_parser.set_defaults(func=lambda args: build_command(args.file, args.metadata)) + + tests_parser = subparsers.add_parser("test") + tests_parser.add_argument("file", type=str, help="Path to input file") + tests_parser.set_defaults(func=lambda args: tests_command(args.file)) + + args = parser.parse_args() + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/tests/test-auxiliary.py b/tests/test-auxiliary.py index 9f03cbd..d1f5f56 100644 --- a/tests/test-auxiliary.py +++ b/tests/test-auxiliary.py @@ -55,7 +55,10 @@ def test_types(db, snapshot): ) assert exec(db, "select * from v") == snapshot() + # TODO: integrity test transaction failures in shadow tables + db.commit() # bad types + db.execute("BEGIN") assert ( exec(db, INSERT, [b"\x11\x11\x11\x11", "not int", 1.2, "text", b"blob"]) == snapshot() @@ -66,6 +69,7 @@ def test_types(db, snapshot): ) assert exec(db, INSERT, [b"\x11\x11\x11\x11", 1, 1.2, 1, b"blob"]) == snapshot() assert exec(db, INSERT, [b"\x11\x11\x11\x11", 1, 1.2, "text", 1]) == snapshot() + db.execute("ROLLBACK") # NULLs are totally chill assert exec(db, INSERT, [b"\x11\x11\x11\x11", None, None, None, None]) == snapshot() @@ -151,5 +155,7 @@ def vec0_shadow_table_contents(db, v): ] o = {} for shadow_table in shadow_tables: + if shadow_table.endswith("_info"): + continue o[shadow_table] = exec(db, f"select * from {shadow_table}") return o diff --git a/tests/test-general.py b/tests/test-general.py new file mode 100644 index 0000000..2e14cae --- /dev/null +++ b/tests/test-general.py @@ -0,0 +1,60 @@ +import sqlite3 +from collections import OrderedDict +import pytest + + +@pytest.mark.skipif( + sqlite3.sqlite_version_info[1] < 37, + reason="pragma_table_list was added in SQLite 3.37", +) +def test_shadow(db, snapshot): + db.execute( + "create virtual table v using vec0(a float[1], partition text partition key, metadata text, +name text, chunk_size=8)" + ) + assert exec(db, "select * from sqlite_master order by name") == snapshot() + assert ( + exec(db, "select * from pragma_table_list where type = 'shadow'") == snapshot() + ) + + db.execute("drop table v;") + assert ( + exec(db, "select * from pragma_table_list where type = 'shadow'") == snapshot() + ) + + +def test_info(db, snapshot): + db.execute("create virtual table v using vec0(a float[1])") + assert exec(db, "select key, typeof(value) from v_info order by 1") == snapshot() + + +def exec(db, sql, parameters=[]): + try: + rows = db.execute(sql, parameters).fetchall() + except (sqlite3.OperationalError, sqlite3.DatabaseError) as e: + return { + "error": e.__class__.__name__, + "message": str(e), + } + a = [] + for row in rows: + o = OrderedDict() + for k in row.keys(): + o[k] = row[k] + a.append(o) + result = OrderedDict() + result["sql"] = sql + result["rows"] = a + return result + + +def vec0_shadow_table_contents(db, v): + shadow_tables = [ + row[0] + for row in db.execute( + "select name from sqlite_master where name like ? order by 1", [f"{v}_%"] + ).fetchall() + ] + o = {} + for shadow_table in shadow_tables: + o[shadow_table] = exec(db, f"select * from {shadow_table}") + return o diff --git a/tests/test-loadable.py b/tests/test-loadable.py index 30171fe..b14902a 100644 --- a/tests/test-loadable.py +++ b/tests/test-loadable.py @@ -1022,6 +1022,7 @@ def test_vec0_drops(): ] == [ "t1", "t1_chunks", + "t1_info", "t1_rowids", "t1_vector_chunks00", "t1_vector_chunks01", @@ -2216,6 +2217,9 @@ def test_smoke(): { "name": "vec_xyz_chunks", }, + { + "name": "vec_xyz_info", + }, { "name": "vec_xyz_rowids", }, diff --git a/tests/test-metadata.py b/tests/test-metadata.py new file mode 100644 index 0000000..3c2e542 --- /dev/null +++ b/tests/test-metadata.py @@ -0,0 +1,629 @@ +import pytest +import sqlite3 +from collections import OrderedDict +import json + + +def test_constructor_limit(db, snapshot): + assert exec( + db, + f""" + create virtual table v using vec0( + {",".join([f"metadata{x} integer" for x in range(17)])} + v float[1] + ) + """, + ) == snapshot(name="max 16 metadata columns") + + +def test_normal(db, snapshot): + db.execute( + "create virtual table v using vec0(vector float[1], b boolean, n int, f float, t text, chunk_size=8)" + ) + assert exec( + db, "select * from sqlite_master where type = 'table' order by name" + ) == snapshot(name="sqlite_master") + + assert vec0_shadow_table_contents(db, "v") == snapshot() + + INSERT = "insert into v(vector, b, n, f, t) values (?, ?, ?, ?, ?)" + assert exec(db, INSERT, [b"\x11\x11\x11\x11", 1, 1, 1.1, "one"]) == snapshot() + assert exec(db, INSERT, [b"\x22\x22\x22\x22", 1, 2, 2.2, "two"]) == snapshot() + assert exec(db, INSERT, [b"\x33\x33\x33\x33", 1, 3, 3.3, "three"]) == snapshot() + + assert exec(db, "select * from v") == snapshot() + assert vec0_shadow_table_contents(db, "v") == snapshot() + + assert exec(db, "drop table v") == snapshot() + assert exec(db, "select * from sqlite_master") == snapshot() + + +# +# assert exec(db, "select * from v") == snapshot() +# assert vec0_shadow_table_contents(db, "v") == snapshot() +# +# db.execute("drop table v;") +# assert exec(db, "select * from sqlite_master order by name") == snapshot( +# name="sqlite_master post drop" +# ) + + +def test_text_knn(db, snapshot): + db.execute( + "create virtual table v using vec0(vector float[1], name text, chunk_size=8)" + ) + assert vec0_shadow_table_contents(db, "v") == snapshot() + INSERT = "insert into v(vector, name) values (?, ?)" + db.execute( + """ + INSERT INTO v(vector, name) VALUES + ('[.11]', 'aaa'), + ('[.22]', 'bbb'), + ('[.33]', 'ccc'), + ('[.44]', 'ddd'), + ('[.55]', 'eee'), + ('[.66]', 'fff'), + ('[.77]', 'ggg'), + ('[.88]', 'hhh'), + ('[.99]', 'iii'); + """ + ) + assert exec(db, "select * from v") == snapshot() + assert vec0_shadow_table_contents(db, "v") == snapshot() + + assert ( + exec( + db, + "select rowid, name, distance from v where vector match '[1]' and k = 5", + ) + == snapshot() + ) + + assert ( + exec( + db, + "select rowid, name, distance from v where vector match '[1]' and k = 5 and name < 'ddd'", + ) + == snapshot() + ) + assert ( + exec( + db, + "select rowid, name, distance from v where vector match '[1]' and k = 5 and name <= 'ddd'", + ) + == snapshot() + ) + assert ( + exec( + db, + "select rowid, name, distance from v where vector match '[1]' and k = 5 and name > 'fff'", + ) + == snapshot() + ) + assert ( + exec( + db, + "select rowid, name, distance from v where vector match '[1]' and k = 5 and name >= 'fff'", + ) + == snapshot() + ) + assert ( + exec( + db, + "select rowid, name, distance from v where vector match '[1]' and k = 5 and name = 'aaa'", + ) + == snapshot() + ) + assert ( + exec( + db, + "select rowid, name, distance from v where vector match '[.01]' and k = 5 and name != 'aaa'", + ) + == snapshot() + ) + + +def test_long_text_updates(db, snapshot): + db.execute( + "create virtual table v using vec0(vector float[1], name text, chunk_size=8)" + ) + assert vec0_shadow_table_contents(db, "v") == snapshot() + INSERT = "insert into v(vector, name) values (?, ?)" + exec(db, INSERT, [b"\x11\x11\x11\x11", "123456789a12"]) + exec(db, INSERT, [b"\x11\x11\x11\x11", "123456789a123"]) + assert exec(db, "select * from v") == snapshot() + assert vec0_shadow_table_contents(db, "v") == snapshot() + + +def test_long_text_knn(db, snapshot): + db.execute( + "create virtual table v using vec0(vector float[1], name text, chunk_size=8)" + ) + INSERT = "insert into v(vector, name) values (?, ?)" + exec(db, INSERT, ["[1]", "aaaa"]) + exec(db, INSERT, ["[2]", "aaaaaaaaaaaa_aaa"]) + exec(db, INSERT, ["[3]", "bbbb"]) + exec(db, INSERT, ["[4]", "bbbbbbbbbbbb_bbb"]) + exec(db, INSERT, ["[5]", "cccc"]) + exec(db, INSERT, ["[6]", "cccccccccccc_ccc"]) + + tests = [ + "bbbb", + "bb", + "bbbbbb", + "bbbbbbbbbbbb_bbb", + "bbbbbbbbbbbb_aaa", + "bbbbbbbbbbbb_ccc", + "longlonglonglonglonglonglong", + ] + ops = ["=", "!=", "<", "<=", ">", ">="] + op_names = ["eq", "ne", "lt", "le", "gt", "ge"] + + for test in tests: + for op, op_name in zip(ops, op_names): + assert exec( + db, + f"select rowid, name, distance from v where vector match '[100]' and k = 5 and name {op} ?", + [test], + ) == snapshot(name=f"{op_name}-{test}") + + +def test_types(db, snapshot): + db.execute( + "create virtual table v using vec0(vector float[1], b boolean, n int, f float, t text, chunk_size=8)" + ) + INSERT = "insert into v(vector, b, n, f, t) values (?, ?, ?, ?, ?)" + + assert exec(db, INSERT, [b"\x11\x11\x11\x11", 1, 1, 1.1, "test"]) == snapshot( + name="legal" + ) + + # fmt: off + assert exec(db, INSERT, [b"\x11\x11\x11\x11", 'illegal', 1, 1.1, 'test']) == snapshot(name="illegal-type-boolean") + assert exec(db, INSERT, [b"\x11\x11\x11\x11", 1, 'illegal', 1.1, 'test']) == snapshot(name="illegal-type-int") + assert exec(db, INSERT, [b"\x11\x11\x11\x11", 1, 1, 'illegal', 'test']) == snapshot(name="illegal-type-float") + assert exec(db, INSERT, [b"\x11\x11\x11\x11", 1, 1, 1.1, 420]) == snapshot(name="illegal-type-text") + # fmt: on + + assert exec(db, INSERT, [b"\x11\x11\x11\x11", 44, 1, 1.1, "test"]) == snapshot( + name="illegal-boolean" + ) + + +def test_updates(db, snapshot): + db.execute( + "create virtual table v using vec0(vector float[1], b boolean, n int, f float, t text, chunk_size=8)" + ) + INSERT = "insert into v(rowid, vector, b, n, f, t) values (?, ?, ?, ?, ?, ?)" + + exec(db, INSERT, [1, b"\x11\x11\x11\x11", 1, 1, 1.1, "test1"]) + exec(db, INSERT, [2, b"\x22\x22\x22\x22", 1, 2, 2.2, "test2"]) + exec(db, INSERT, [3, b"\x33\x33\x33\x33", 1, 3, 3.3, "1234567890123"]) + assert exec(db, "select * from v") == snapshot(name="1-init-contents") + assert vec0_shadow_table_contents(db, "v") == snapshot(name="1-init-shadow") + + assert exec( + db, "UPDATE v SET b = 0, n = 11, f = 11.11, t = 'newtest1' where rowid = 1" + ) + assert exec(db, "select * from v") == snapshot(name="general-update-contents") + assert vec0_shadow_table_contents(db, "v") == snapshot( + name="general-update-shaodnw" + ) + + # string update #1: long string updated to long string + exec(db, "UPDATE v SET t = '1234567890123-updated' where rowid = 3") + assert exec(db, "select * from v") == snapshot(name="string-update-1-contents") + assert vec0_shadow_table_contents(db, "v") == snapshot( + name="string-update-1-shadow" + ) + + # string update #2: short string updated to short string + exec(db, "UPDATE v SET t = 'test2-short' where rowid = 2") + assert exec(db, "select * from v") == snapshot(name="string-update-2-contents") + assert vec0_shadow_table_contents(db, "v") == snapshot( + name="string-update-2-shadow" + ) + + # string update #3: short string updated to long string + exec(db, "UPDATE v SET t = 'test2-long-long-long' where rowid = 2") + assert exec(db, "select * from v") == snapshot(name="string-update-3-contents") + assert vec0_shadow_table_contents(db, "v") == snapshot( + name="string-update-3-shadow" + ) + + # string update #4: long string updated to short string + exec(db, "UPDATE v SET t = 'test2-shortx' where rowid = 2") + assert exec(db, "select * from v") == snapshot(name="string-update-4-contents") + assert vec0_shadow_table_contents(db, "v") == snapshot( + name="string-update-4-shadow" + ) + + +def test_deletes(db, snapshot): + db.execute( + "create virtual table v using vec0(vector float[1], b boolean, n int, f float, t text, chunk_size=8)" + ) + INSERT = "insert into v(rowid, vector, b, n, f, t) values (?, ?, ?, ?, ?, ?)" + + assert exec(db, INSERT, [1, b"\x11\x11\x11\x11", 1, 1, 1.1, "test1"]) == snapshot() + assert exec(db, INSERT, [2, b"\x22\x22\x22\x22", 1, 2, 2.2, "test2"]) == snapshot() + assert ( + exec(db, INSERT, [3, b"\x33\x33\x33\x33", 1, 3, 3.3, "1234567890123"]) + == snapshot() + ) + + assert exec(db, "select * from v") == snapshot() + assert vec0_shadow_table_contents(db, "v") == snapshot() + + assert exec(db, "DELETE FROM v where rowid = 1") == snapshot() + assert exec(db, "select * from v") == snapshot() + assert vec0_shadow_table_contents(db, "v") == snapshot() + + assert exec(db, "DELETE FROM v where rowid = 3") == snapshot() + assert exec(db, "select * from v") == snapshot() + assert vec0_shadow_table_contents(db, "v") == snapshot() + + +def test_knn(db, snapshot): + db.execute( + "create virtual table v using vec0(vector float[1], name text, chunk_size=8)" + ) + assert exec( + db, "select * from sqlite_master where type = 'table' order by name" + ) == snapshot(name="sqlite_master") + db.executemany( + "insert into v(vector, name) values (?, ?)", + [("[1]", "alex"), ("[2]", "brian"), ("[3]", "craig")], + ) + + # EVIDENCE-OF: V16511_00582 catches "illegal" constraints on metadata columns + assert ( + exec( + db, + "select *, distance from v where vector match '[5]' and k = 3 and name like 'illegal'", + ) + == snapshot() + ) + + +SUPPORTS_VTAB_IN = sqlite3.sqlite_version_info[1] >= 38 + + +@pytest.mark.skipif( + not SUPPORTS_VTAB_IN, reason="requires vtab `x in (...)` support in SQLite >=3.38" +) +def test_vtab_in(db, snapshot): + db.execute( + "create virtual table v using vec0(vector float[1], n int, t text, b boolean, f float, chunk_size=8)" + ) + db.executemany( + "insert into v(rowid, vector, n, t, b, f) values (?, ?, ?, ?, ?, ?)", + [ + (1, "[1]", 999, "aaaa", 0, 1.1), + (2, "[2]", 555, "aaaa", 0, 1.1), + (3, "[3]", 999, "aaaa", 0, 1.1), + (4, "[4]", 555, "aaaa", 0, 1.1), + (5, "[5]", 999, "zzzz", 0, 1.1), + (6, "[6]", 555, "zzzz", 0, 1.1), + (7, "[7]", 999, "zzzz", 0, 1.1), + (8, "[8]", 555, "zzzz", 0, 1.1), + ], + ) + + # EVIDENCE-OF: V15248_32086 + assert exec( + db, "select * from v where vector match '[0]' and k = 8 and b in (1, 0)" + ) == snapshot(name="block-bool") + + assert exec( + db, "select * from v where vector match '[0]' and k = 8 and f in (1.1, 0.0)" + ) == snapshot(name="block-float") + + assert exec( + db, + "select rowid, n, distance from v where vector match '[0]' and k = 8 and n in (555, 999)", + ) == snapshot(name="allow-int-all") + assert exec( + db, + "select rowid, n, distance from v where vector match '[0]' and k = 8 and n in (555, -1, -2)", + ) == snapshot(name="allow-int-superfluous") + + assert exec( + db, + "select rowid, t, distance from v where vector match '[0]' and k = 8 and t in ('aaaa', 'zzzz')", + ) == snapshot(name="allow-text-all") + assert exec( + db, + "select rowid, t, distance from v where vector match '[0]' and k = 8 and t in ('aaaa', 'foo', 'bar')", + ) == snapshot(name="allow-text-superfluous") + + +def test_vtab_in_long_text(db, snapshot): + db.execute( + "create virtual table v using vec0(vector float[1], t text, chunk_size=8)" + ) + data = [ + (1, "aaaa"), + (2, "aaaaaaaaaaaa_aaa"), + (3, "bbbb"), + (4, "bbbbbbbbbbbb_bbb"), + (5, "cccc"), + (6, "cccccccccccc_ccc"), + ] + db.executemany( + "insert into v(rowid, vector, t) values (:rowid, printf('[%d]', :rowid), :vector)", + [{"rowid": row[0], "vector": row[1]} for row in data], + ) + + for _, lookup in data: + assert exec( + db, + "select rowid, t from v where vector match '[0]' and k = 10 and t in (?, 'nonsense')", + [lookup], + ) == snapshot(name=f"individual-{lookup}") + + assert exec( + db, + "select rowid, t from v where vector match '[0]' and k = 10 and t in (select value from json_each(?))", + [json.dumps([row[1] for row in data])], + ) == snapshot(name="all") + + +def test_idxstr(db, snapshot): + db.execute( + """ + create virtual table vec_movies using vec0( + movie_id integer primary key, + synopsis_embedding float[1], + +title text, + is_favorited boolean, + genre text, + num_reviews int, + mean_rating float, + chunk_size=8 + ); + """ + ) + + assert ( + eqp( + db, + "select * from vec_movies where synopsis_embedding match '' and k = 0 and is_favorited = true", + ) + == snapshot() + ) + + ops = ["<", ">", "<=", ">=", "!="] + + for op in ops: + assert eqp( + db, + f"select * from vec_movies where synopsis_embedding match '' and k = 0 and genre {op} NULL", + ) == snapshot(name=f"knn-constraint-text {op}") + + for op in ops: + assert eqp( + db, + f"select * from vec_movies where synopsis_embedding match '' and k = 0 and num_reviews {op} NULL", + ) == snapshot(name=f"knn-constraint-int {op}") + + for op in ops: + assert eqp( + db, + f"select * from vec_movies where synopsis_embedding match '' and k = 0 and mean_rating {op} NULL", + ) == snapshot(name=f"knn-constraint-float {op}") + + # for op in ops: + # assert eqp( + # db, + # f"select * from vec_movies where synopsis_embedding match '' and k = 0 and is_favorited {op} NULL", + # ) == snapshot(name=f"knn-constraint-boolean {op}") + + +def eqp(db, sql): + o = OrderedDict() + o["sql"] = sql + o["plan"] = [ + dict(row) for row in db.execute(f"explain query plan {sql}").fetchall() + ] + for p in o["plan"]: + # value is different on macos-aarch64 in github actions, not sure why + del p["notused"] + return o + + +def test_stress(db, snapshot): + db.execute( + """ + create virtual table vec_movies using vec0( + movie_id integer primary key, + synopsis_embedding float[1], + +title text, + is_favorited boolean, + genre text, + num_reviews int, + mean_rating float, + chunk_size=8 + ); + """ + ) + + db.execute( + """ + INSERT INTO vec_movies(movie_id, synopsis_embedding, is_favorited, genre, title, num_reviews, mean_rating) + VALUES + (1, '[1]', 0, 'horror', 'The Conjuring', 153, 4.6), + (2, '[2]', 0, 'comedy', 'Dumb and Dumber', 382, 2.6), + (3, '[3]', 0, 'scifi', 'Interstellar', 53, 5.0), + (4, '[4]', 0, 'fantasy', 'The Lord of the Rings: The Fellowship of the Ring', 210, 4.2), + (5, '[5]', 1, 'documentary', 'An Inconvenient Truth', 93, 3.4), + (6, '[6]', 1, 'horror', 'Hereditary', 167, 4.7), + (7, '[7]', 1, 'comedy', 'Anchorman: The Legend of Ron Burgundy', 482, 2.9), + (8, '[8]', 0, 'scifi', 'Blade Runner 2049', 301, 5.0), + (9, '[9]', 1, 'fantasy', 'Harry Potter and the Sorcerer''s Stone', 134, 4.1), + (10, '[10]', 0, 'documentary', 'Free Solo', 66, 3.2), + (11, '[11]', 1, 'horror', 'Get Out', 88, 4.9), + (12, '[12]', 0, 'comedy', 'The Hangover', 59, 2.8), + (13, '[13]', 1, 'scifi', 'The Matrix', 423, 4.5), + (14, '[14]', 0, 'fantasy', 'Pan''s Labyrinth', 275, 3.6), + (15, '[15]', 1, 'documentary', '13th', 191, 4.4), + (16, '[16]', 0, 'horror', 'It Follows', 314, 4.3), + (17, '[17]', 1, 'comedy', 'Step Brothers', 74, 3.0), + (18, '[18]', 1, 'scifi', 'Inception', 201, 5.0), + (19, '[19]', 1, 'fantasy', 'The Shape of Water', 399, 2.7), + (20, '[20]', 1, 'documentary', 'Won''t You Be My Neighbor?', 186, 4.8), + (21, '[21]', 1, 'scifi', 'Gravity', 342, 4.0), + (22, '[22]', 1, 'scifi', 'Dune', 451, 4.4), + (23, '[23]', 1, 'scifi', 'The Martian', 522, 4.6), + (24, '[24]', 1, 'horror', 'A Quiet Place', 271, 4.3), + (25, '[25]', 1, 'fantasy', 'The Chronicles of Narnia: The Lion, the Witch and the Wardrobe', 310, 3.9); + + """ + ) + + assert vec0_shadow_table_contents(db, "vec_movies") == snapshot() + assert ( + exec( + db, + """ + select + movie_id, + title, + genre, + num_reviews, + mean_rating, + is_favorited, + distance + from vec_movies + where synopsis_embedding match '[15.5]' + and genre = 'scifi' + and num_reviews between 100 and 500 + and mean_rating > 3.5 + and k = 5; + """, + ) + == snapshot() + ) + + assert ( + exec( + db, + "select movie_id, genre, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and genre = 'horror'", + ) + == snapshot() + ) + assert ( + exec( + db, + "select movie_id, genre, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and genre = 'comedy'", + ) + == snapshot() + ) + assert ( + exec( + db, + "select movie_id, num_reviews, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and num_reviews between 100 and 500", + ) + == snapshot() + ) + assert ( + exec( + db, + "select movie_id, num_reviews, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and num_reviews >= 500", + ) + == snapshot() + ) + assert ( + exec( + db, + "select movie_id, mean_rating, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and mean_rating < 3.0", + ) + == snapshot() + ) + assert ( + exec( + db, + "select movie_id, mean_rating, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and mean_rating between 4.0 and 5.0", + ) + == snapshot() + ) + + assert exec( + db, + "select movie_id, is_favorited, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and is_favorited = TRUE", + ) == snapshot(name="bool-eq-true") + assert exec( + db, + "select movie_id, is_favorited, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and is_favorited != TRUE", + ) == snapshot(name="bool-ne-true") + assert exec( + db, + "select movie_id, is_favorited, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and is_favorited = FALSE", + ) == snapshot(name="bool-eq-false") + assert exec( + db, + "select movie_id, is_favorited, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and is_favorited != FALSE", + ) == snapshot(name="bool-ne-false") + + # EVIDENCE-OF: V10145_26984 + assert exec( + db, + "select movie_id, is_favorited, distance from vec_movies where synopsis_embedding match '[100]' and k = 5 and is_favorited >= 999", + ) == snapshot(name="bool-other-op") + + +def test_errors(db, snapshot): + db.execute("create virtual table v using vec0(vector float[1], t text)") + db.execute("insert into v(vector, t) values ('[1]', 'aaaaaaaaaaaax')") + + assert exec(db, "select * from v") == snapshot() + + # EVIDENCE-OF: V15466_32305 + db.set_authorizer( + authorizer_deny_on(sqlite3.SQLITE_READ, "v_metadatatext00", "data") + ) + assert exec(db, "select * from v") == snapshot() + + +def authorizer_deny_on(operation, x1, x2=None): + def _auth(op, p1, p2, p3, p4): + if op == operation and p1 == x1 and p2 == x2: + return sqlite3.SQLITE_DENY + return sqlite3.SQLITE_OK + + return _auth + + +def exec(db, sql, parameters=[]): + try: + rows = db.execute(sql, parameters).fetchall() + except (sqlite3.OperationalError, sqlite3.DatabaseError) as e: + return { + "error": e.__class__.__name__, + "message": str(e), + } + a = [] + for row in rows: + o = OrderedDict() + for k in row.keys(): + o[k] = row[k] + a.append(o) + result = OrderedDict() + result["sql"] = sql + result["rows"] = a + return result + + +def vec0_shadow_table_contents(db, v): + shadow_tables = [ + row[0] + for row in db.execute( + "select name from sqlite_master where name like ? order by 1", [f"{v}_%"] + ).fetchall() + ] + o = {} + for shadow_table in shadow_tables: + if shadow_table.endswith("_info"): + continue + o[shadow_table] = exec(db, f"select * from {shadow_table}") + return o diff --git a/tests/test-partition-keys.py b/tests/test-partition-keys.py index 41c7671..fee3560 100644 --- a/tests/test-partition-keys.py +++ b/tests/test-partition-keys.py @@ -111,5 +111,7 @@ def vec0_shadow_table_contents(db, v): ] o = {} for shadow_table in shadow_tables: + if shadow_table.endswith("_info"): + continue o[shadow_table] = exec(db, f"select * from {shadow_table}") return o