From 85c1d0799025c22a74a19668ef4b7d4baafbec70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8E=98=E6=9D=83=20=E9=A9=AC?= Date: Fri, 6 Sep 2024 18:31:11 +0800 Subject: [PATCH] refactor: cross_repo_search --- metagpt/tools/libs/editor.py | 37 +------------------ metagpt/tools/libs/index_repo.py | 63 +++++++++++++++++++++++++++++--- 2 files changed, 60 insertions(+), 40 deletions(-) diff --git a/metagpt/tools/libs/editor.py b/metagpt/tools/libs/editor.py index 89a3f70cc..29e04b56f 100644 --- a/metagpt/tools/libs/editor.py +++ b/metagpt/tools/libs/editor.py @@ -3,7 +3,6 @@ This file is borrowed from OpenDevin You can find the original repository here: https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/plugins/agent_skills/file_ops/file_ops.py """ -import asyncio import os import re import shutil @@ -15,10 +14,9 @@ from pydantic import BaseModel, ConfigDict from metagpt.const import DEFAULT_WORKSPACE_ROOT from metagpt.logs import logger -from metagpt.tools.libs.index_repo import OTHER_TYPE, IndexRepo +from metagpt.tools.libs.index_repo import IndexRepo from metagpt.tools.libs.linter import Linter from metagpt.tools.tool_registry import register_tool -from metagpt.utils.common import list_files from metagpt.utils.file import File from metagpt.utils.report import EditorReporter @@ -882,35 +880,4 @@ class Editor(BaseModel): List[str]: A list of search results as strings, containing the text from the merged results and any direct results from other files. """ - if not file_or_path or not Path(file_or_path).exists(): - raise ValueError(f'"{str(file_or_path)}" not exists') - files = [file_or_path] if not Path(file_or_path).is_dir() else list_files(file_or_path) - clusters, roots = IndexRepo.classify_path(files) - futures = [] - others = set() - for persist_path, filenames in clusters.items(): - if persist_path == OTHER_TYPE: - others.update(filenames) - continue - root = roots[persist_path] - repo = IndexRepo(persist_path=persist_path, root_path=root) - futures.append(repo.search(query=query, filenames=list(filenames))) - - for i in others: - futures.append(File.read_text_file(i)) - - futures_results = [] - if futures: - futures_results = await asyncio.gather(*futures) - - result = [] - v_result = [] - for i in futures_results: - if isinstance(i, str): - result.append(i) - else: - v_result.append(i) - - repo = IndexRepo() - merged = await repo.merge(query=query, indices_list=v_result) - return [i.text for i in merged] + result + return await IndexRepo.cross_repo_search(query=query, file_or_path=file_or_path) diff --git a/metagpt/tools/libs/index_repo.py b/metagpt/tools/libs/index_repo.py index 7de6bce5e..24065c4be 100644 --- a/metagpt/tools/libs/index_repo.py +++ b/metagpt/tools/libs/index_repo.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- - +import asyncio import json import re from pathlib import Path @@ -295,16 +295,16 @@ class IndexRepo(BaseModel): return old_fp != fp @staticmethod - def classify_path(files: List[Union[str, Path]]) -> Tuple[Dict[str, Set[Path]], Dict[str, str]]: - """Classify a list of file paths or Path objects into different categories. + def find_index_repo_path(files: List[Union[str, Path]]) -> Tuple[Dict[str, Set[Path]], Dict[str, str]]: + """Map the file path to the corresponding index repo. Args: files (List[Union[str, Path]]): A list of file paths or Path objects to be classified. Returns: Tuple[Dict[str, Set[Path]], Dict[str, str]]: - - A dictionary mapping the classified path types to sets of corresponding Path objects. - - A dictionary mapping the classified path types to their corresponding root directories. + - A dictionary mapping the index repo path to the files. + - A dictionary mapping the index repo path to their corresponding root directories. """ mappings = { UPLOADS_INDEX_ROOT: re.compile(r"^/data/uploads($|/.*)"), @@ -351,3 +351,56 @@ class IndexRepo(BaseModel): except Exception as e: logger.warning(f"Load meta error: {e}") return default_meta + + @staticmethod + async def cross_repo_search(query: str, file_or_path: Union[str, Path]) -> List[str]: + """Search for a query across multiple repositories. + + This asynchronous function searches for the specified query in files + located at the given path or file. + + Args: + query (str): The search term to look for in the files. + file_or_path (Union[str, Path]): The path to the file or directory + where the search should be conducted. This can be a string path + or a Path object. + + Returns: + List[str]: A list of strings containing the paths of files that + contain the query results. + + Raises: + ValueError: If the query string is empty. + """ + if not file_or_path or not Path(file_or_path).exists(): + raise ValueError(f'"{str(file_or_path)}" not exists') + files = [file_or_path] if not Path(file_or_path).is_dir() else list_files(file_or_path) + clusters, roots = IndexRepo.find_index_repo_path(files) + futures = [] + others = set() + for persist_path, filenames in clusters.items(): + if persist_path == OTHER_TYPE: + others.update(filenames) + continue + root = roots[persist_path] + repo = IndexRepo(persist_path=persist_path, root_path=root) + futures.append(repo.search(query=query, filenames=list(filenames))) + + for i in others: + futures.append(File.read_text_file(i)) + + futures_results = [] + if futures: + futures_results = await asyncio.gather(*futures) + + result = [] + v_result = [] + for i in futures_results: + if isinstance(i, str): + result.append(i) + else: + v_result.append(i) + + repo = IndexRepo() + merged = await repo.merge(query=query, indices_list=v_result) + return [i.text for i in merged] + result