mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-26 01:06:27 +02:00
text knn NE
This commit is contained in:
parent
31622209eb
commit
4ba167c315
3 changed files with 185 additions and 24 deletions
47
sqlite-vec.c
47
sqlite-vec.c
|
|
@ -5885,13 +5885,16 @@ int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void *
|
||||||
sqlite3_blob_close(rowidsBlob);
|
sqlite3_blob_close(rowidsBlob);
|
||||||
|
|
||||||
switch(op) {
|
switch(op) {
|
||||||
|
int nPrefix;
|
||||||
|
char * sPrefix;
|
||||||
char *sFull;
|
char *sFull;
|
||||||
int nFull;
|
int nFull;
|
||||||
|
u8 * view;
|
||||||
case VEC0_METADATA_OPERATOR_EQ: {
|
case VEC0_METADATA_OPERATOR_EQ: {
|
||||||
for(int i = 0; i < size; i++) {
|
for(int i = 0; i < size; i++) {
|
||||||
u8 * view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
|
view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
|
||||||
int nPrefix = ((int*) view)[0];
|
nPrefix = ((int*) view)[0];
|
||||||
char * sPrefix = (char *) &view[4];
|
sPrefix = (char *) &view[4];
|
||||||
|
|
||||||
// for EQ the text lengths must match
|
// for EQ the text lengths must match
|
||||||
if(nPrefix != nTarget) {
|
if(nPrefix != nTarget) {
|
||||||
|
|
@ -5925,11 +5928,39 @@ int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void *
|
||||||
}
|
}
|
||||||
case VEC0_METADATA_OPERATOR_NE: {
|
case VEC0_METADATA_OPERATOR_NE: {
|
||||||
for(int i = 0; i < size; i++) {
|
for(int i = 0; i < size; i++) {
|
||||||
u8 * view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
|
view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
|
||||||
int n = ((int*) view)[0];
|
nPrefix = ((int*) view)[0];
|
||||||
char * s = (char *) &view[4];
|
sPrefix = (char *) &view[4];
|
||||||
if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {rc = SQLITE_ERROR;goto done;} /* TODO */
|
|
||||||
bitmap_set(b, i, strncmp(s, sTarget, n) != 0);
|
// for NE if text lengths dont match, it never will
|
||||||
|
if(nPrefix != nTarget) {
|
||||||
|
bitmap_set(b, i, 1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH));
|
||||||
|
|
||||||
|
// for short strings, use the prefix comparison direclty
|
||||||
|
if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
|
||||||
|
bitmap_set(b, i, cmpPrefix != 0);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// for NE on longs strings, if prefixes dont match, then long string wont
|
||||||
|
if(cmpPrefix) {
|
||||||
|
bitmap_set(b, i, 1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// consult the full string
|
||||||
|
rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
|
||||||
|
if(rc != SQLITE_OK) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
if(nPrefix != nFull) {
|
||||||
|
rc = SQLITE_ERROR;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
bitmap_set(b, i, strncmp(sFull, sTarget, nFull) != 0);
|
||||||
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -783,33 +783,163 @@
|
||||||
})
|
})
|
||||||
# ---
|
# ---
|
||||||
# name: test_long_text_knn[ne-bbbb]
|
# name: test_long_text_knn[ne-bbbb]
|
||||||
dict({
|
OrderedDict({
|
||||||
'error': 'OperationalError',
|
'sql': "select * from v where vector match X'11111111' and k = 5 and name != ?",
|
||||||
'message': 'unrecognized token: "!"',
|
'rows': list([
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 6,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'cccccccccccc_ccc',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 5,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'cccc',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 4,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'bbbbbbbbbbbb_bbb',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 2,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'aaaaaaaaaaaa_aaa',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 1,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'aaaa',
|
||||||
|
}),
|
||||||
|
]),
|
||||||
})
|
})
|
||||||
# ---
|
# ---
|
||||||
# name: test_long_text_knn[ne-bbbbbbbbbbbb_aaa]
|
# name: test_long_text_knn[ne-bbbbbbbbbbbb_aaa]
|
||||||
dict({
|
OrderedDict({
|
||||||
'error': 'OperationalError',
|
'sql': "select * from v where vector match X'11111111' and k = 5 and name != ?",
|
||||||
'message': 'unrecognized token: "!"',
|
'rows': list([
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 6,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'cccccccccccc_ccc',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 5,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'cccc',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 4,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'bbbbbbbbbbbb_bbb',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 3,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'bbbb',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 2,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'aaaaaaaaaaaa_aaa',
|
||||||
|
}),
|
||||||
|
]),
|
||||||
})
|
})
|
||||||
# ---
|
# ---
|
||||||
# name: test_long_text_knn[ne-bbbbbbbbbbbb_bbb]
|
# name: test_long_text_knn[ne-bbbbbbbbbbbb_bbb]
|
||||||
dict({
|
OrderedDict({
|
||||||
'error': 'OperationalError',
|
'sql': "select * from v where vector match X'11111111' and k = 5 and name != ?",
|
||||||
'message': 'unrecognized token: "!"',
|
'rows': list([
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 6,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'cccccccccccc_ccc',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 5,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'cccc',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 3,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'bbbb',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 2,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'aaaaaaaaaaaa_aaa',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 1,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'aaaa',
|
||||||
|
}),
|
||||||
|
]),
|
||||||
})
|
})
|
||||||
# ---
|
# ---
|
||||||
# name: test_long_text_knn[ne-bbbbbbbbbbbb_ccc]
|
# name: test_long_text_knn[ne-bbbbbbbbbbbb_ccc]
|
||||||
dict({
|
OrderedDict({
|
||||||
'error': 'OperationalError',
|
'sql': "select * from v where vector match X'11111111' and k = 5 and name != ?",
|
||||||
'message': 'unrecognized token: "!"',
|
'rows': list([
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 6,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'cccccccccccc_ccc',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 5,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'cccc',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 4,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'bbbbbbbbbbbb_bbb',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 3,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'bbbb',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 2,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'aaaaaaaaaaaa_aaa',
|
||||||
|
}),
|
||||||
|
]),
|
||||||
})
|
})
|
||||||
# ---
|
# ---
|
||||||
# name: test_long_text_knn[ne-longlonglonglonglonglonglong]
|
# name: test_long_text_knn[ne-longlonglonglonglonglonglong]
|
||||||
dict({
|
OrderedDict({
|
||||||
'error': 'OperationalError',
|
'sql': "select * from v where vector match X'11111111' and k = 5 and name != ?",
|
||||||
'message': 'unrecognized token: "!"',
|
'rows': list([
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 6,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'cccccccccccc_ccc',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 5,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'cccc',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 4,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'bbbbbbbbbbbb_bbb',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 3,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'bbbb',
|
||||||
|
}),
|
||||||
|
OrderedDict({
|
||||||
|
'rowid': 2,
|
||||||
|
'vector': b'\x11\x11\x11\x11',
|
||||||
|
'name': 'aaaaaaaaaaaa_aaa',
|
||||||
|
}),
|
||||||
|
]),
|
||||||
})
|
})
|
||||||
# ---
|
# ---
|
||||||
# name: test_long_text_updates
|
# name: test_long_text_updates
|
||||||
|
|
|
||||||
|
|
@ -152,7 +152,7 @@ def test_long_text_knn(db, snapshot):
|
||||||
"bbbbbbbbbbbb_ccc",
|
"bbbbbbbbbbbb_ccc",
|
||||||
"longlonglonglonglonglonglong",
|
"longlonglonglonglonglonglong",
|
||||||
]
|
]
|
||||||
ops = ["=", "!-", "<", "<=", ">", ">="]
|
ops = ["=", "!=", "<", "<=", ">", ">="]
|
||||||
op_names = ["eq", "ne", "lt", "le", "gt", "ge"]
|
op_names = ["eq", "ne", "lt", "le", "gt", "ge"]
|
||||||
|
|
||||||
for test in tests:
|
for test in tests:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue