mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 16:56:27 +02:00
long text support
This commit is contained in:
parent
f12be5292b
commit
8132f26f3b
3 changed files with 244 additions and 15 deletions
89
sqlite-vec.c
89
sqlite-vec.c
|
|
@ -3419,6 +3419,7 @@ static sqlite3_module vec_npy_eachModule = {
|
|||
#define VEC0_SHADOW_AUXILIARY_NAME "\"%w\".\"%w_auxiliary\""
|
||||
|
||||
#define VEC0_SHADOW_METADATA_N_NAME "\"%w\".\"%w_metadata_chunks%02d\""
|
||||
#define VEC0_SHADOW_METADATA_TEXT_DATA_NAME "\"%w\".\"%w_metadata_text_data_%02d\""
|
||||
|
||||
#define VEC_INTERAL_ERROR "Internal sqlite-vec error: "
|
||||
#define REPORT_URL "https://github.com/asg017/sqlite-vec/issues/new"
|
||||
|
|
@ -4094,8 +4095,23 @@ int vec0_result_metadata_value_for_rowid(vec0_vtab *p, i64 rowid, int metadata_i
|
|||
sqlite3_result_text(context, (const char*) (view + 4), length, SQLITE_TRANSIENT);
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "TODO: handle longer strings in result_metadata_value\n");
|
||||
abort();
|
||||
sqlite3_stmt * stmt;
|
||||
const char * zSql = sqlite3_mprintf("SELECT data FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx);
|
||||
if(!zSql) {
|
||||
abort();
|
||||
}
|
||||
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
|
||||
if(rc != SQLITE_OK) {
|
||||
abort();
|
||||
}
|
||||
sqlite3_bind_int64(stmt, 1, rowid);
|
||||
rc = sqlite3_step(stmt);
|
||||
if(rc != SQLITE_ROW) {
|
||||
abort();
|
||||
}
|
||||
sqlite3_result_value(context, sqlite3_column_value(stmt, 0));
|
||||
sqlite3_finalize(stmt);
|
||||
rc = SQLITE_OK;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -5032,6 +5048,25 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv,
|
|||
goto error;
|
||||
}
|
||||
sqlite3_finalize(stmt);
|
||||
|
||||
if(pNew->metadata_columns[i].kind == VEC0_METADATA_COLUMN_KIND_TEXT) {
|
||||
char *zSql = sqlite3_mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME "(rowid PRIMARY KEY, data TEXT);",
|
||||
pNew->schemaName, pNew->tableName, i);
|
||||
if (!zSql) {
|
||||
goto error;
|
||||
}
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, 0);
|
||||
sqlite3_free((void *)zSql);
|
||||
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
|
||||
sqlite3_finalize(stmt);
|
||||
*pzErr = sqlite3_mprintf(
|
||||
"Could not create '_metadata_text_data_%02d' shadow table: %s", i,
|
||||
sqlite3_errmsg(db));
|
||||
goto error;
|
||||
}
|
||||
sqlite3_finalize(stmt);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if(pNew->numAuxiliaryColumns > 0) {
|
||||
|
|
@ -5149,6 +5184,17 @@ static int vec0Destroy(sqlite3_vtab *pVtab) {
|
|||
goto done;
|
||||
}
|
||||
sqlite3_finalize(stmt);
|
||||
|
||||
if(p->metadata_columns[i].kind == VEC0_METADATA_COLUMN_KIND_TEXT) {
|
||||
zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME, p->schemaName,p->tableName, i);
|
||||
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
|
||||
sqlite3_free((void *)zSql);
|
||||
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
|
||||
rc = SQLITE_ERROR;
|
||||
goto done;
|
||||
}
|
||||
sqlite3_finalize(stmt);
|
||||
}
|
||||
}
|
||||
|
||||
stmt = NULL;
|
||||
|
|
@ -7305,7 +7351,7 @@ cleanup:
|
|||
* @param chunk_offset offset the row/metadata value is assigned to
|
||||
* @return int
|
||||
*/
|
||||
int vec0_insert_metadata_values(vec0_vtab *p, int argc, sqlite3_value ** argv, i64 chunk_id, i64 chunk_offset) {
|
||||
int vec0_insert_metadata_values(vec0_vtab *p, int argc, sqlite3_value ** argv, i64 chunk_id, i64 chunk_offset, i64 rowid) {
|
||||
int rc;
|
||||
for(int i = 0; i < vec0_num_defined_user_columns(p); i++) {
|
||||
if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) {
|
||||
|
|
@ -7389,21 +7435,34 @@ int vec0_insert_metadata_values(vec0_vtab *p, int argc, sqlite3_value ** argv, i
|
|||
case VEC0_METADATA_COLUMN_KIND_TEXT: {
|
||||
char * s = sqlite3_value_text(v);
|
||||
int n = sqlite3_value_bytes(v);
|
||||
if(n <= 12) {
|
||||
u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
|
||||
memset(view, 0, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
|
||||
memcpy(view, &n, sizeof(int));
|
||||
memcpy(view+4, s, n);
|
||||
u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
|
||||
memset(view, 0, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
|
||||
memcpy(view, &n, sizeof(int));
|
||||
memcpy(view+4, s, min(n, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH-4));
|
||||
|
||||
rc = sqlite3_blob_write(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "TODO handle longer strings");
|
||||
abort();
|
||||
rc = sqlite3_blob_write(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
|
||||
if(n > 12) {
|
||||
const char * zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " (rowid, data) VALUES (?, ?)", p->schemaName, p->tableName, metadata_idx);
|
||||
if(!zSql) {
|
||||
abort();
|
||||
}
|
||||
sqlite3_stmt * stmt;
|
||||
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
|
||||
if(rc != SQLITE_OK) {
|
||||
abort();
|
||||
}
|
||||
sqlite3_bind_int64(stmt, 1, rowid);
|
||||
sqlite3_bind_text(stmt, 2, s, n, SQLITE_STATIC);
|
||||
rc = sqlite3_step(stmt);
|
||||
if(rc != SQLITE_DONE) {
|
||||
abort();
|
||||
}
|
||||
sqlite3_finalize(stmt);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
printf("rc=%d\n", rc);
|
||||
if(rc != SQLITE_OK) {
|
||||
|
||||
}
|
||||
|
|
@ -7631,7 +7690,7 @@ int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
|
|||
goto cleanup;
|
||||
}
|
||||
|
||||
rc = vec0_insert_metadata_values(p, argc, argv, chunk_rowid, chunk_offset);
|
||||
rc = vec0_insert_metadata_values(p, argc, argv, chunk_rowid, chunk_offset, rowid);
|
||||
if(rc != SQLITE_OK) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
|
@ -7993,7 +8052,7 @@ int vec0Update_Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv) {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO handle metadata
|
||||
// TODO handle metadata updates
|
||||
|
||||
// 4) iterate over all new vectors, update the vectors
|
||||
for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
|
||||
|
|
|
|||
|
|
@ -251,6 +251,13 @@
|
|||
'rootpage': 7,
|
||||
'sql': 'CREATE TABLE "v_metadata_chunks00"(rowid PRIMARY KEY, data BLOB NOT NULL)',
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'v_metadata_text_data_00',
|
||||
'tbl_name': 'v_metadata_text_data_00',
|
||||
'rootpage': 9,
|
||||
'sql': 'CREATE TABLE "v_metadata_text_data_00"(rowid PRIMARY KEY, data TEXT)',
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'v_rowids',
|
||||
|
|
@ -268,6 +275,111 @@
|
|||
]),
|
||||
})
|
||||
# ---
|
||||
# name: test_long_text
|
||||
dict({
|
||||
'v_chunks': OrderedDict({
|
||||
'sql': 'select * from v_chunks',
|
||||
'rows': list([
|
||||
]),
|
||||
}),
|
||||
'v_metadata_chunks00': OrderedDict({
|
||||
'sql': 'select * from v_metadata_chunks00',
|
||||
'rows': list([
|
||||
]),
|
||||
}),
|
||||
'v_metadata_text_data_00': OrderedDict({
|
||||
'sql': 'select * from v_metadata_text_data_00',
|
||||
'rows': list([
|
||||
]),
|
||||
}),
|
||||
'v_rowids': OrderedDict({
|
||||
'sql': 'select * from v_rowids',
|
||||
'rows': list([
|
||||
]),
|
||||
}),
|
||||
'v_vector_chunks00': OrderedDict({
|
||||
'sql': 'select * from v_vector_chunks00',
|
||||
'rows': list([
|
||||
]),
|
||||
}),
|
||||
})
|
||||
# ---
|
||||
# name: test_long_text.1
|
||||
OrderedDict({
|
||||
'sql': 'select * from v',
|
||||
'rows': list([
|
||||
OrderedDict({
|
||||
'rowid': 1,
|
||||
'vector': b'\x11\x11\x11\x11',
|
||||
'name': '123456789a12',
|
||||
}),
|
||||
OrderedDict({
|
||||
'rowid': 2,
|
||||
'vector': b'\x11\x11\x11\x11',
|
||||
'name': '123456789a123',
|
||||
}),
|
||||
]),
|
||||
})
|
||||
# ---
|
||||
# name: test_long_text.2
|
||||
dict({
|
||||
'v_chunks': OrderedDict({
|
||||
'sql': 'select * from v_chunks',
|
||||
'rows': list([
|
||||
OrderedDict({
|
||||
'chunk_id': 1,
|
||||
'size': 8,
|
||||
'validity': b'\x03',
|
||||
'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
'v_metadata_chunks00': OrderedDict({
|
||||
'sql': 'select * from v_metadata_chunks00',
|
||||
'rows': list([
|
||||
OrderedDict({
|
||||
'rowid': 1,
|
||||
'data': b'\x0c\x00\x00\x00123456789a12\r\x00\x00\x00123456789a12\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
'v_metadata_text_data_00': OrderedDict({
|
||||
'sql': 'select * from v_metadata_text_data_00',
|
||||
'rows': list([
|
||||
OrderedDict({
|
||||
'rowid': 2,
|
||||
'data': '123456789a123',
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
'v_rowids': OrderedDict({
|
||||
'sql': 'select * from v_rowids',
|
||||
'rows': list([
|
||||
OrderedDict({
|
||||
'rowid': 1,
|
||||
'id': None,
|
||||
'chunk_id': 1,
|
||||
'chunk_offset': 0,
|
||||
}),
|
||||
OrderedDict({
|
||||
'rowid': 2,
|
||||
'id': None,
|
||||
'chunk_id': 1,
|
||||
'chunk_offset': 1,
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
'v_vector_chunks00': OrderedDict({
|
||||
'sql': 'select * from v_vector_chunks00',
|
||||
'rows': list([
|
||||
OrderedDict({
|
||||
'rowid': 1,
|
||||
'vectors': b'\x11\x11\x11\x11\x11\x11\x11\x11\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
})
|
||||
# ---
|
||||
# name: test_normal.1
|
||||
dict({
|
||||
'v_chunks': OrderedDict({
|
||||
|
|
@ -295,6 +407,11 @@
|
|||
'rows': list([
|
||||
]),
|
||||
}),
|
||||
'v_metadata_text_data_03': OrderedDict({
|
||||
'sql': 'select * from v_metadata_text_data_03',
|
||||
'rows': list([
|
||||
]),
|
||||
}),
|
||||
'v_rowids': OrderedDict({
|
||||
'sql': 'select * from v_rowids',
|
||||
'rows': list([
|
||||
|
|
@ -408,6 +525,11 @@
|
|||
}),
|
||||
]),
|
||||
}),
|
||||
'v_metadata_text_data_03': OrderedDict({
|
||||
'sql': 'select * from v_metadata_text_data_03',
|
||||
'rows': list([
|
||||
]),
|
||||
}),
|
||||
'v_rowids': OrderedDict({
|
||||
'sql': 'select * from v_rowids',
|
||||
'rows': list([
|
||||
|
|
@ -442,6 +564,27 @@
|
|||
}),
|
||||
})
|
||||
# ---
|
||||
# name: test_normal.7
|
||||
OrderedDict({
|
||||
'sql': 'drop table v',
|
||||
'rows': list([
|
||||
]),
|
||||
})
|
||||
# ---
|
||||
# name: test_normal.8
|
||||
OrderedDict({
|
||||
'sql': 'select * from sqlite_master',
|
||||
'rows': list([
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'sqlite_sequence',
|
||||
'tbl_name': 'sqlite_sequence',
|
||||
'rootpage': 3,
|
||||
'sql': 'CREATE TABLE sqlite_sequence(name,seq)',
|
||||
}),
|
||||
]),
|
||||
})
|
||||
# ---
|
||||
# name: test_normal[sqlite_master]
|
||||
OrderedDict({
|
||||
'sql': "select * from sqlite_master where type = 'table' order by name",
|
||||
|
|
@ -495,6 +638,13 @@
|
|||
'rootpage': 13,
|
||||
'sql': 'CREATE TABLE "v_metadata_chunks03"(rowid PRIMARY KEY, data BLOB NOT NULL)',
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'v_metadata_text_data_03',
|
||||
'tbl_name': 'v_metadata_text_data_03',
|
||||
'rootpage': 15,
|
||||
'sql': 'CREATE TABLE "v_metadata_text_data_03"(rowid PRIMARY KEY, data TEXT)',
|
||||
}),
|
||||
OrderedDict({
|
||||
'type': 'table',
|
||||
'name': 'v_rowids',
|
||||
|
|
@ -732,6 +882,11 @@
|
|||
}),
|
||||
]),
|
||||
}),
|
||||
'vec_movies_metadata_text_data_01': OrderedDict({
|
||||
'sql': 'select * from vec_movies_metadata_text_data_01',
|
||||
'rows': list([
|
||||
]),
|
||||
}),
|
||||
'vec_movies_rowids': OrderedDict({
|
||||
'sql': 'select * from vec_movies_rowids',
|
||||
'rows': list([
|
||||
|
|
|
|||
|
|
@ -33,6 +33,9 @@ def test_normal(db, snapshot):
|
|||
assert exec(db, "select * from v") == snapshot()
|
||||
assert vec0_shadow_table_contents(db, "v") == snapshot()
|
||||
|
||||
assert exec(db, "drop table v") == snapshot()
|
||||
assert exec(db, "select * from sqlite_master") == snapshot()
|
||||
|
||||
|
||||
#
|
||||
# assert exec(db, "select * from v") == snapshot()
|
||||
|
|
@ -44,6 +47,18 @@ def test_normal(db, snapshot):
|
|||
# )
|
||||
|
||||
|
||||
def test_long_text(db, snapshot):
|
||||
db.execute(
|
||||
"create virtual table v using vec0(vector float[1], name text, chunk_size=8)"
|
||||
)
|
||||
assert vec0_shadow_table_contents(db, "v") == snapshot()
|
||||
INSERT = "insert into v(vector, name) values (?, ?)"
|
||||
assert exec(db, INSERT, [b"\x11\x11\x11\x11", "123456789a12"])
|
||||
assert exec(db, INSERT, [b"\x11\x11\x11\x11", "123456789a123"])
|
||||
assert exec(db, "select * from v") == snapshot()
|
||||
assert vec0_shadow_table_contents(db, "v") == snapshot()
|
||||
|
||||
|
||||
def test_types(db, snapshot):
|
||||
db.execute(
|
||||
"create virtual table v using vec0(vector float[1], b boolean, n int, f float, t text, chunk_size=8)"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue