diff --git a/sqlite-vec.c b/sqlite-vec.c index 3dedf37..f2c01c4 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -5960,17 +5960,37 @@ int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void * goto done; } bitmap_set(b, i, strncmp(sFull, sTarget, nFull) != 0); - } break; } case VEC0_METADATA_OPERATOR_GT: { for(int i = 0; i < size; i++) { - u8 * view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; - int n = ((int*) view)[0]; - char * s = (char *) &view[4]; - if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {rc = SQLITE_ERROR;goto done;} /* TODO */ - bitmap_set(b, i, strncmp(s, sTarget, n) > 0); + view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; + nPrefix = ((int*) view)[0]; + sPrefix = (char *) &view[4]; + int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)); + + // for short strings, use the prefix comparison direclty + if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) { + bitmap_set(b, i, cmpPrefix > 0); + continue; + } + + // for GT, only need to consult full string if EQ + if(cmpPrefix != 0) { + bitmap_set(b, i, cmpPrefix > 0); + continue; + } + + rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull); + if(rc != SQLITE_OK) { + goto done; + } + if(nPrefix != nFull) { + rc = SQLITE_ERROR; + goto done; + } + bitmap_set(b, i, strncmp(sFull, sTarget, nFull) > 0); } break; } diff --git a/test.sql b/test.sql index 93bb20a..ec27623 100644 --- a/test.sql +++ b/test.sql @@ -4,6 +4,21 @@ .mode qbox + +create table t as select value from json_each('["aaaa","aaaaaaaaaaaa_aaa","bbbb","bbbbbbbbbbbb_bbb","cccc","cccccccccccc_ccc"]') order by 1; + + +.param set :p 'bbbbbbbbbbbb_ccc' + +select :p; + +select * +from t +where value > :p +order by value desc; + +.exit + create virtual table v using vec0( vector float[1], +description text diff --git a/tests/__snapshots__/test-metadata.ambr b/tests/__snapshots__/test-metadata.ambr index d9892f5..33d443c 100644 --- a/tests/__snapshots__/test-metadata.ambr +++ b/tests/__snapshots__/test-metadata.ambr @@ -693,33 +693,88 @@ }) # --- # name: test_long_text_knn[gt-bbbb] - dict({ - 'error': 'OperationalError', - 'message': 'Could not filter metadata fields', + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name > ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + ]), }) # --- # name: test_long_text_knn[gt-bbbbbbbbbbbb_aaa] - dict({ - 'error': 'OperationalError', - 'message': 'Could not filter metadata fields', + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name > ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + OrderedDict({ + 'rowid': 4, + 'name': 'bbbbbbbbbbbb_bbb', + 'distance': 96.0, + }), + ]), }) # --- # name: test_long_text_knn[gt-bbbbbbbbbbbb_bbb] - dict({ - 'error': 'OperationalError', - 'message': 'Could not filter metadata fields', + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name > ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + ]), }) # --- # name: test_long_text_knn[gt-bbbbbbbbbbbb_ccc] - dict({ - 'error': 'OperationalError', - 'message': 'Could not filter metadata fields', + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name > ?", + 'rows': list([ + OrderedDict({ + 'rowid': 6, + 'name': 'cccccccccccc_ccc', + 'distance': 94.0, + }), + OrderedDict({ + 'rowid': 5, + 'name': 'cccc', + 'distance': 95.0, + }), + ]), }) # --- # name: test_long_text_knn[gt-longlonglonglonglonglonglong] - dict({ - 'error': 'OperationalError', - 'message': 'Could not filter metadata fields', + OrderedDict({ + 'sql': "select rowid, name, distance from v where vector match '[100]' and k = 5 and name > ?", + 'rows': list([ + ]), }) # --- # name: test_long_text_knn[le-bbbb]