From 63a3a760e91949a6b856c2241f4d402c38ad755a Mon Sep 17 00:00:00 2001 From: seehi <6580@pm.me> Date: Fri, 8 Mar 2024 11:18:53 +0800 Subject: [PATCH] update rag example --- examples/rag_pipeline.py | 17 +++++++++++------ metagpt/rag/engines/simple.py | 10 +++------- metagpt/rag/factories/index.py | 4 ++-- metagpt/utils/reflection.py | 8 +++----- 4 files changed, 19 insertions(+), 20 deletions(-) diff --git a/examples/rag_pipeline.py b/examples/rag_pipeline.py index 68b6a3741..83e147dd9 100644 --- a/examples/rag_pipeline.py +++ b/examples/rag_pipeline.py @@ -92,7 +92,7 @@ class RAGExample: self.engine.add_docs([travel_filepath]) await self.rag_pipeline(question=travel_question, print_title=False) - async def rag_add_objs(self): + async def rag_add_objs(self, print_title=True): """This example show how to add objs, before add docs engine retrieve nothing, after add objs engine give the correct answer, will print something like: [Before add objs] Retrieve Result: @@ -104,8 +104,8 @@ class RAGExample: [Object Detail] {'name': 'Mike', 'goal': 'Win The 100-meter Sprint', 'tool': 'Red Bull Energy Drink'} """ - - self._print_title("RAG Add Objs") + if print_title: + self._print_title("RAG Add Objs") player = Player(name="Mike") question = f"{player.rag_key()}" @@ -118,17 +118,22 @@ class RAGExample: nodes = await self._retrieve_and_print(question) print("[Object Detail]") - player: Player = nodes[0].metadata["obj"] - print(player.name) + try: + player: Player = nodes[0].metadata["obj"] + print(player.name) + except Exception as e: + print(f"ERROR: nodes is empty, llm don't answer correctly, exception: {e}") async def rag_ini_objs(self): """This example show how to from objs, will print something like: Same as rag_add_objs """ + self._print_title("RAG Ini Objs") + pre_engine = self.engine self.engine = SimpleEngine.from_objs(retriever_configs=[FAISSRetrieverConfig()]) - await self.rag_add_objs() + await self.rag_add_objs(print_title=False) self.engine = pre_engine async def rag_chromadb(self): diff --git a/metagpt/rag/engines/simple.py b/metagpt/rag/engines/simple.py index 8ada8fc52..cceb9dd03 100644 --- a/metagpt/rag/engines/simple.py +++ b/metagpt/rag/engines/simple.py @@ -170,6 +170,7 @@ class SimpleEngine(RetrieverQueryEngine): documents = SimpleDirectoryReader(input_files=input_files).load_data() self._fix_document_metadata(documents) + nodes = run_transformations(documents, transformations=self.index._transformations) self._save_nodes(nodes) @@ -220,11 +221,6 @@ class SimpleEngine(RetrieverQueryEngine): @staticmethod def _fix_document_metadata(documents: list[Document]): - """LlamaIndex bug, maybe deleted in the near future. - - Metadata in doc has `file_path`, but excluded_embed_metadata_keys is missing. - """ + """LlamaIndex keep metadata['file_path'], which is unnecessary, maybe deleted in the near future.""" for doc in documents: - keys_set = set(doc.excluded_embed_metadata_keys) - keys_set.add("file_path") - doc.excluded_embed_metadata_keys = list(keys_set) + doc.excluded_embed_metadata_keys.append("file_path") diff --git a/metagpt/rag/factories/index.py b/metagpt/rag/factories/index.py index 50b286cdc..eaa2ab97c 100644 --- a/metagpt/rag/factories/index.py +++ b/metagpt/rag/factories/index.py @@ -36,8 +36,8 @@ class RAGIndexFactory(ConfigFactory): def _create_chroma(self, config: ChromaIndexConfig, **kwargs) -> VectorStoreIndex: embed_model = self.extract_embed_model(config, **kwargs) - db2 = chromadb.PersistentClient(str(config.persist_path)) - chroma_collection = db2.get_or_create_collection(config.collection_name) + db = chromadb.PersistentClient(str(config.persist_path)) + chroma_collection = db.get_or_create_collection(config.collection_name) vector_store = ChromaVectorStore(chroma_collection=chroma_collection) index = VectorStoreIndex.from_vector_store( vector_store, diff --git a/metagpt/utils/reflection.py b/metagpt/utils/reflection.py index 887cdf299..8b8237ae7 100644 --- a/metagpt/utils/reflection.py +++ b/metagpt/utils/reflection.py @@ -1,11 +1,9 @@ -""" -class tools, including method inspection, class attributes, inheritance relationships, etc. -""" +"""class tools, including method inspection, class attributes, inheritance relationships, etc.""" def check_methods(C, *methods): - """ - Check if the class has methods. borrow from _collections_abc. + """Check if the class has methods. borrow from _collections_abc. + Useful when implementing implicit interfaces, such as defining an abstract class, isinstance can be used for determination without inheritance. """ mro = C.__mro__