From 513d1317d6282c1869400e841f8a06d10529c7a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8E=98=E6=9D=83=20=E9=A9=AC?= Date: Thu, 12 Sep 2024 12:25:32 +0800 Subject: [PATCH] feat: +similarity_search --- metagpt/roles/architect.py | 2 +- metagpt/roles/project_manager.py | 2 +- metagpt/tools/libs/editor.py | 7 ++++--- tests/metagpt/roles/di/test_data_analyst.py | 20 ++++++++++++++++++++ 4 files changed, 26 insertions(+), 5 deletions(-) create mode 100644 tests/metagpt/roles/di/test_data_analyst.py diff --git a/metagpt/roles/architect.py b/metagpt/roles/architect.py index ccce75afa..b990808b3 100644 --- a/metagpt/roles/architect.py +++ b/metagpt/roles/architect.py @@ -41,7 +41,7 @@ class Architect(RoleZero): instruction: str = ARCHITECT_INSTRUCTION max_react_loop: int = 1 # FIXME: Read and edit files requires more steps, consider later tools: list[str] = [ - "Editor:write,read,write_content", + "Editor:write,read,write_content,similarity_search", "RoleZero", "WriteDesign", ] diff --git a/metagpt/roles/project_manager.py b/metagpt/roles/project_manager.py index 228b38660..d64fb90a1 100644 --- a/metagpt/roles/project_manager.py +++ b/metagpt/roles/project_manager.py @@ -31,7 +31,7 @@ class ProjectManager(RoleZero): instruction: str = """Use WriteTasks tool to write a project task list""" max_react_loop: int = 1 # FIXME: Read and edit files requires more steps, consider later - tools: list[str] = ["Editor:write,read,write_content", "RoleZero", "WriteTasks"] + tools: list[str] = ["Editor:write,read,write_content,similarity_search", "RoleZero", "WriteTasks"] def __init__(self, **kwargs) -> None: super().__init__(**kwargs) diff --git a/metagpt/tools/libs/editor.py b/metagpt/tools/libs/editor.py index 9ba207bc6..c5c828ee2 100644 --- a/metagpt/tools/libs/editor.py +++ b/metagpt/tools/libs/editor.py @@ -937,15 +937,16 @@ class Editor(BaseModel): @staticmethod async def similarity_search(query: str, file_or_path: Union[str, Path]) -> List[str]: - """Performs a similarity search for a given query across specified files or paths. + """Given a filename or a pathname, performs a similarity search for a given query across the specified file or path. This method searches the index repository for the provided query, classifying the specified files or paths. It performs a search on each cluster of files and handles non-indexed files separately, merging results from structured indices with any direct results from non-indexed files. + This function call does not depend on other functions. Args: query (str): The search query string to look for in the indexed files. - file_or_path (Union[str, Path]): A path or filename to search within. + file_or_path (Union[str, Path]): A pathname or filename to search within. Returns: List[str]: A list of results as strings, containing the text from the merged results @@ -953,7 +954,7 @@ class Editor(BaseModel): Example: >>> query = "The problem to be analyzed from the document" - >>> file_or_path = "The document or folder you want to query" + >>> file_or_path = "The pathname or filename you want to search within" >>> texts: List[str] = await Editor.similarity_search(query=query, file_or_path=file_or_path) >>> print(texts) """ diff --git a/tests/metagpt/roles/di/test_data_analyst.py b/tests/metagpt/roles/di/test_data_analyst.py new file mode 100644 index 000000000..cf17d078e --- /dev/null +++ b/tests/metagpt/roles/di/test_data_analyst.py @@ -0,0 +1,20 @@ +import pytest + +from metagpt.const import TEST_DATA_PATH +from metagpt.roles.di.data_analyst import DataAnalyst + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + ("query", "filename"), [("similarity search about '有哪些需求描述?' in document ", TEST_DATA_PATH / "requirements/2.pdf")] +) +async def test_similarity_search(query, filename): + di = DataAnalyst() + query += f"'{str(filename)}'" + + rsp = await di.run(query) + assert rsp + + +if __name__ == "__main__": + pytest.main([__file__, "-s"])