This commit is contained in:
Leon 2023-08-10 12:00:28 -07:00
parent 3934b86af4
commit 2e28ea6927
2 changed files with 37 additions and 1 deletions

View file

@ -8,6 +8,7 @@
import lancedb
import pyarrow as pa
import pandas as pd
import shutil, os
class LanceStore:
@ -24,7 +25,7 @@ class LanceStore:
schema = schema.remove_metadata()
schema = schema.remove(len(schema) - 1)
self.table = self.db.create_table(self.name, schema)
self.table = self.db.create_table(self.name, schema=schema)
def search(self, query, n_results=2, metric="L2", nprobes=20, **kwargs):
# This assumes query is a vector embedding
@ -85,3 +86,10 @@ class LanceStore:
return self.table.delete(f"id = '{_id}'")
else:
return self.table.delete(f"id = {_id}")
def drop(self, name):
# This function drops a table, if it exists.
path = os.path.join(self.db.uri, name + '.lance')
if os.path.exists(path):
shutil.rmtree(path)

View file

@ -0,0 +1,28 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2023/8/9 15:42
@Author : unkn-wn (Leon Yee)
@File : test_lancedb_store.py
"""
from metagpt.document_store.lancedb_store import LanceStore
import random
def test_lance_store():
# This simply establishes the connection to the database, so we can drop the table if it exists
store = LanceStore('test')
store.drop('test')
store.create_table(['vector', 'id', 'meta', 'meta2'])
store.write(data=[[random.random() for _ in range(100)] for _ in range(2)],
metadatas=[{"source": "google-docs"}, {"source": "notion"}],
ids=["doc1", "doc2"])
store.add(data=[random.random() for _ in range(100)], metadatas={"source": "notion"}, ids="doc3")
result = store.search([random.random() for _ in range(100)], n_results=3)
print(result)
assert(len(result) > 0)