This commit is contained in:
Alex Garcia 2024-11-18 11:56:47 -08:00
parent b527a3d9ec
commit 07793b3ecc
2 changed files with 48 additions and 40 deletions

9
TODO
View file

@ -14,11 +14,12 @@
# metadata filtering # metadata filtering
- text comparisons (long) - text comparisons (long)
- skip invalid validity entries in knn filter? - `v in (...)` handling
- null!
- date/datetime
- [ ] test accessing aux values when rowid is different than 1,2,3 etc. - [ ] test accessing aux values when rowid is different than 1,2,3 etc.
- [ ] add `xyz_info` shadow table with version etc.
- later - later
- `v in (...)` handling - null!
- date/datetime
- remaining TODO items - remaining TODO items
- skip invalid validity entries in knn filter?
- dictionary encoding? - dictionary encoding?

View file

@ -3433,7 +3433,7 @@ typedef struct vec0_vtab vec0_vtab;
#define SQLITE_VEC_VEC0_MAX_DIMENSIONS 8192 #define SQLITE_VEC_VEC0_MAX_DIMENSIONS 8192
#define VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH 16 #define VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH 16
#define VEC0_METADATA_TEXT_VIEW_SIZE_LENGTH 16 #define VEC0_METADATA_TEXT_VIEW_DATA_LENGTH 12
typedef enum { typedef enum {
// vector column, ie "contents_embedding float[1024]" // vector column, ie "contents_embedding float[1024]"
@ -4091,7 +4091,7 @@ int vec0_result_metadata_value_for_rowid(vec0_vtab *p, i64 rowid, int metadata_i
goto done; goto done;
} }
int length = ((int *)view)[0]; int length = ((int *)view)[0];
if(length <= 12) { if(length <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
sqlite3_result_text(context, (const char*) (view + 4), length, SQLITE_TRANSIENT); sqlite3_result_text(context, (const char*) (view + 4), length, SQLITE_TRANSIENT);
} }
else { else {
@ -5857,8 +5857,8 @@ int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void *
sqlite3_stmt * stmt = NULL; sqlite3_stmt * stmt = NULL;
i64 * rowids = NULL; i64 * rowids = NULL;
sqlite3_blob * rowidsBlob; sqlite3_blob * rowidsBlob;
const char * target = (const char *) sqlite3_value_text(value); const char * sTarget = (const char *) sqlite3_value_text(value);
int targetn = sqlite3_value_bytes(value); int nTarget = sqlite3_value_bytes(value);
// TODO(perf): only text metadata news the rowids BLOB. Make it so that // TODO(perf): only text metadata news the rowids BLOB. Make it so that
@ -5885,34 +5885,41 @@ int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void *
sqlite3_blob_close(rowidsBlob); sqlite3_blob_close(rowidsBlob);
switch(op) { switch(op) {
char *sFull;
int nFull;
case VEC0_METADATA_OPERATOR_EQ: { case VEC0_METADATA_OPERATOR_EQ: {
for(int i = 0; i < size; i++) { for(int i = 0; i < size; i++) {
u8 * view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; u8 * view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
int n = ((int*) view)[0]; int nPrefix = ((int*) view)[0];
char * s = (char *) &view[4]; char * sPrefix = (char *) &view[4];
if(n != targetn) {
// for EQ the text lengths must match
if(nPrefix != nTarget) {
bitmap_set(b, i, 0); bitmap_set(b, i, 0);
continue; continue;
} }
int prefix_cmp = strncmp(s, target, min(n, 12)); int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH));
if(n <= 12) {
bitmap_set(b, i, prefix_cmp == 0); // for short strings, use the prefix comparison direclty
if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
bitmap_set(b, i, cmpPrefix == 0);
continue;
} }
// if the prefix doesnt match, the rest of the string wont match // for EQ on longs strings, the prefix must match
else if(prefix_cmp) { if(cmpPrefix) {
bitmap_set(b, i, 0); bitmap_set(b, i, 0);
continue;
} }
// need to consult // consult the full string
else { rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
char *slong;
int slongn;
rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &slongn, &slong);
if(rc != SQLITE_OK) { if(rc != SQLITE_OK) {
goto done; goto done;
} }
assert(n == slongn); if(nPrefix != nFull) {
bitmap_set(b, i, strncmp(slong, target, n) == 0); rc = SQLITE_ERROR;
goto done;
} }
bitmap_set(b, i, strncmp(sFull, sTarget, nFull) == 0);
} }
break; break;
} }
@ -5921,8 +5928,8 @@ int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void *
u8 * view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; u8 * view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
int n = ((int*) view)[0]; int n = ((int*) view)[0];
char * s = (char *) &view[4]; char * s = (char *) &view[4];
if(n > 12) {rc = SQLITE_ERROR;goto done;} /* TODO */ if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {rc = SQLITE_ERROR;goto done;} /* TODO */
bitmap_set(b, i, strncmp(s, target, n) != 0); bitmap_set(b, i, strncmp(s, sTarget, n) != 0);
} }
break; break;
} }
@ -5931,8 +5938,8 @@ int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void *
u8 * view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; u8 * view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
int n = ((int*) view)[0]; int n = ((int*) view)[0];
char * s = (char *) &view[4]; char * s = (char *) &view[4];
if(n > 12) {rc = SQLITE_ERROR;goto done;} /* TODO */ if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {rc = SQLITE_ERROR;goto done;} /* TODO */
bitmap_set(b, i, strncmp(s, target, n) > 0); bitmap_set(b, i, strncmp(s, sTarget, n) > 0);
} }
break; break;
} }
@ -5941,8 +5948,8 @@ int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void *
u8 * view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; u8 * view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
int n = ((int*) view)[0]; int n = ((int*) view)[0];
char * s = (char *) &view[4]; char * s = (char *) &view[4];
if(n > 12) {rc = SQLITE_ERROR;goto done;} /* TODO */ if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {rc = SQLITE_ERROR;goto done;} /* TODO */
bitmap_set(b, i, strncmp(s, target, n) >= 0); bitmap_set(b, i, strncmp(s, sTarget, n) >= 0);
} }
break; break;
} }
@ -5951,8 +5958,8 @@ int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void *
u8 * view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; u8 * view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
int n = ((int*) view)[0]; int n = ((int*) view)[0];
char * s = (char *) &view[4]; char * s = (char *) &view[4];
if(n > 12) {rc = SQLITE_ERROR;goto done;} /* TODO */ if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {rc = SQLITE_ERROR;goto done;} /* TODO */
bitmap_set(b, i, strncmp(s, target, n) <= 0); bitmap_set(b, i, strncmp(s, sTarget, n) <= 0);
} }
break; break;
} }
@ -5961,8 +5968,8 @@ int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void *
u8 * view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; u8 * view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
int n = ((int*) view)[0]; int n = ((int*) view)[0];
char * s = (char *) &view[4]; char * s = (char *) &view[4];
if(n > 12) {rc = SQLITE_ERROR;goto done;} /* TODO */ if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {rc = SQLITE_ERROR;goto done;} /* TODO */
bitmap_set(b, i, strncmp(s, target, n) < 0); bitmap_set(b, i, strncmp(s, sTarget, n) < 0);
} }
break; break;
} }
@ -7572,10 +7579,10 @@ int vec0_write_metadata_value(vec0_vtab *p, int metadata_column_idx, i64 rowid,
memcpy(view+4, s, min(n, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH-4)); memcpy(view+4, s, min(n, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH-4));
rc = sqlite3_blob_write(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH); rc = sqlite3_blob_write(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
if(n > 12) { if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
const char * zSql; const char * zSql;
if(isupdate && (prev_n > 12)) { if(isupdate && (prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)) {
zSql = sqlite3_mprintf("UPDATE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " SET data = ?2 WHERE rowid = ?1", p->schemaName, p->tableName, metadata_column_idx); zSql = sqlite3_mprintf("UPDATE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " SET data = ?2 WHERE rowid = ?1", p->schemaName, p->tableName, metadata_column_idx);
}else { }else {
zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " (rowid, data) VALUES (?1, ?2)", p->schemaName, p->tableName, metadata_column_idx); zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " (rowid, data) VALUES (?1, ?2)", p->schemaName, p->tableName, metadata_column_idx);
@ -7599,7 +7606,7 @@ int vec0_write_metadata_value(vec0_vtab *p, int metadata_column_idx, i64 rowid,
goto done; goto done;
} }
} }
else if(prev_n > 12) { else if(prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
const char * zSql = sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_column_idx); const char * zSql = sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_column_idx);
if(!zSql) { if(!zSql) {
rc = SQLITE_NOMEM; rc = SQLITE_NOMEM;
@ -8038,7 +8045,7 @@ int vec0Update_Delete_ClearMetadata(vec0_vtab *p, int metadata_idx, i64 rowid, i
goto done; goto done;
} }
if(n > 12) { if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
const char * zSql = sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx); const char * zSql = sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx);
if(!zSql) { if(!zSql) {
rc = SQLITE_NOMEM; rc = SQLITE_NOMEM;