diff --git a/.gitignore b/.gitignore index 1613a638d..2c59f3b59 100644 --- a/.gitignore +++ b/.gitignore @@ -171,3 +171,4 @@ tests/metagpt/utils/file_repo_git *.png htmlcov htmlcov.* +*.pkl diff --git a/examples/faq.xlsx b/examples/example.xlsx similarity index 100% rename from examples/faq.xlsx rename to examples/example.xlsx diff --git a/metagpt/document.py b/metagpt/document.py index dcbd19d4d..f4fa0a489 100644 --- a/metagpt/document.py +++ b/metagpt/document.py @@ -101,6 +101,7 @@ class Document(BaseModel): raise ValueError("File path is not set.") self.path.parent.mkdir(parents=True, exist_ok=True) + # TODO: excel, csv, json, etc. self.path.write_text(self.content, encoding="utf-8") def persist(self): @@ -126,10 +127,12 @@ class IndexableDocument(Document): if not data_path.exists(): raise FileNotFoundError(f"File {data_path} not found.") data = read_data(data_path) - content = data_path.read_text() if isinstance(data, pd.DataFrame): validate_cols(content_col, data) - return cls(data=data, content=content, content_col=content_col, meta_col=meta_col) + return cls(data=data, content=str(data), content_col=content_col, meta_col=meta_col) + else: + content = data_path.read_text() + return cls(data=data, content=content, content_col=content_col, meta_col=meta_col) def _get_docs_and_metadatas_by_df(self) -> (list, list): df = self.data diff --git a/metagpt/document_store/faiss_store.py b/metagpt/document_store/faiss_store.py index bfba1d386..1271f1c23 100644 --- a/metagpt/document_store/faiss_store.py +++ b/metagpt/document_store/faiss_store.py @@ -14,7 +14,6 @@ from langchain.vectorstores import FAISS from langchain_core.embeddings import Embeddings from metagpt.config import CONFIG -from metagpt.const import DATA_PATH from metagpt.document import IndexableDocument from metagpt.document_store.base_store import LocalStore from metagpt.logs import logger @@ -76,10 +75,3 @@ class FaissStore(LocalStore): def delete(self, *args, **kwargs): """Currently, langchain does not provide a delete interface.""" raise NotImplementedError - - -if __name__ == "__main__": - faiss_store = FaissStore(DATA_PATH / "qcs/qcs_4w.json") - logger.info(faiss_store.search("Oily Skin Facial Cleanser")) - faiss_store.add([f"Oily Skin Facial Cleanser-{i}" for i in range(3)]) - logger.info(faiss_store.search("Oily Skin Facial Cleanser")) diff --git a/tests/metagpt/document_store/test_faiss_store.py b/tests/metagpt/document_store/test_faiss_store.py index 75bb5427f..7e2979bd4 100644 --- a/tests/metagpt/document_store/test_faiss_store.py +++ b/tests/metagpt/document_store/test_faiss_store.py @@ -30,3 +30,11 @@ async def test_search_xlsx(): query = "Which facial cleanser is good for oily skin?" result = await role.run(query) logger.info(result) + + +@pytest.mark.asyncio +async def test_write(): + store = FaissStore(EXAMPLE_PATH / "example.xlsx", meta_col="Answer", content_col="Question") + _faiss_store = store.write() + assert _faiss_store.docstore + assert _faiss_store.index