fix conflict

2026-07-17 16:41:05 +02:00 · 2024-09-09 11:01:30 +08:00 · 2024-09-09 11:01:30 +08:00 · 1797fdc1f8
commit 1797fdc1f8
parent 53ef7be68c fdf2a0edf6
15 changed files with 952 additions and 246 deletions
--- a/metagpt/environment/mgx/mgx_env.py
+++ b/metagpt/environment/mgx/mgx_env.py
@ -96,7 +96,7 @@ class MGXEnv(Environment, SerializationMixin):

    async def reply_to_human(self, content: str, sent_from: Role = None) -> str:
        # NOTE: Can be overwritten in remote setting
-        return "SUCCESS, human has received your reply. Refrain from resending duplicate messages."
+        return "SUCCESS, human has received your reply. Refrain from resending duplicate messages.  If you no longer need to take action, use the command ‘end’ to stop."

    def message_within_software_sop(self, message: Message) -> bool:
        # Engineer, QaEngineer can be end of the SOP. Their msg requires routing outside.
--- a/metagpt/prompts/di/engineer2.py
+++ b/metagpt/prompts/di/engineer2.py
@ -1,32 +1,89 @@
 from metagpt.prompts.di.role_zero import ROLE_INSTRUCTION

-EXTRA_INSTRUCTION_DEPRECATED = """
-4. Each time you write a code in your response, write with the Editor directly without preparing a repetitive code block beforehand.
-5. Take on ONE task and write ONE code file in each response. DON'T attempt all tasks in one response.
-6. When not specified, you should write files in a folder named "src". If you know the project path, then write in a "src" folder under the project path.
-7. When provided system design or project schedule, you MUST read them first before making a plan, then adhere to them in your implementation, especially in the programming language, package, or framework. You MUST implement all code files prescribed in the system design or project schedule. You can create a plan first with each task corresponding to implementing one code file.
-8. Write at most one file per task, do your best to implement THE ONLY ONE FILE. CAREFULLY CHECK THAT YOU DONT MISS ANY NECESSARY CLASS/FUNCTION IN THIS FILE.
-9. COMPLETE CODE: Your code will be part of the entire project, so please implement complete, reliable, reusable code snippets.
-10. When provided system design, YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design.
-11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO.
-12. To modify code in a file, read the entire file, make changes, and update the file with the complete code, ensuring that no line numbers are included in the final write.
-13. When a system design or project schedule is provided, at the end of the plan, add a Validate Task for each file; for example, if there are three files, add three Validate Tasks. For each Validate Task, just call ValidateAndRewriteCode.run.
-14. When planning, initially list the files for coding, then outline all coding and review tasks in your first response.
-15. Note 'Task for {file_name} completed.' — signifies the {file_name} coding task is done.
-16. Avoid re-reviewing or re-coding the same code. When you decide to take a write or review action, include the command 'finish current task' in the same response.
-17. When coding JavaScript, avoid using '\'' in strings.
-18. If you plan to read a file, do not include other plans in the same response.
-"""
-
 EXTRA_INSTRUCTION = """
-6. When not specified, you should write files in a folder named "src". If you know the project path, then write in a "src" folder under the project path.
-7. When provided system design or project schedule, you MUST read them first before making a plan, then adhere to them in your implementation, especially in the programming language, package, or framework. You MUST implement all code files prescribed in the system design or project schedule. You can create a plan first with each task corresponding to implementing one code file.
-8. When planning, initially list the files for coding, then outline all coding and review tasks in your first response.
-9. If you plan to read a file, do not include other plans in the same response.
-10. Use Engineer2.write_new_code to create or modify a file. Write only one code file each time.
-11. When the requirement is simple, you don't need to create a plan, just do it right away.
-"""
+You are an autonomous programmer

+The special interface consists of a file editor that shows you 100 lines of a file at a time.
+
+You can use terminal commands (e.g., cat, ls, cd) by calling Terminal.run_command.
+
+
+You should carefully observe the behavior and results of the previous action, and avoid triggering repeated errors.
+
+In addition to the terminal, I also provide additional tools. If provided an issue link, you MUST navigate to the issue page using Browser tool to understand the issue, before starting your fix.
+
+Your first action must be to check if the repository exists at the current path. If it exists, navigate to the repository path. If the repository doesn't exist, please download it and then navigate to it.
+All subsequent actions must be performed within this repository path. Do not leave this directory to execute any actions at any time.
+
+Note:
+
+1. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the Editor.goto_line command. It's much quicker. 
+2. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current open file.
+3. When using Editor.edit_file_by_replace, if there is no exact match, take the difference in indentation into consideration.
+4. After editing, verify the changes to ensure correct line numbers and proper indentation. Adhere to PEP8 standards for Python code.
+5. NOTE ABOUT THE EDIT COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate indentation before each line! Ensuring the code adheres to PEP8 standards. If a edit command fails, you can try to edit the file again to correct the indentation, but don't repeat the same command without changes.
+6. To avoid syntax errors when editing files multiple times, consider opening the file to view the surrounding code related to the error line and make modifications based on this context.
+7. Ensure to observe the currently open file and the current working directory, which is displayed right after the open file. The open file might be in a different directory than the working directory. Remember, commands like 'create' open files and might alter the current open file.
+8. Effectively using Use search commands (`search_dir`, `search_file`, `find_file`) and navigation commands (`open_file`, `goto_line`) to locate and modify files efficiently. The Editor tool can fully satisfy the requirements. Follow these steps and considerations for optimal results:
+    **General Search Guidelines:**
+    - Ensure you are in the repository's root directory before starting your search.
+    - Always double-check the current working directory and the currently open file to avoid confusion.
+    - Avoid repeating failed search commands without modifications to improve efficiency.
+
+    **Strategies for Searching and Navigating Files:**
+
+    1. **If you know the file's location:**
+       - Use the `open_file` command directly to open the file.
+       - Use `search_file` to find the `search_term` within the currently open file.
+       - Alternatively, use the `goto_line` command to jump to the specified line.
+       - **Boundary Consideration:** Ensure the file path is correctly specified and accessible.
+
+    2. **If you know the filename but not the exact location:**
+       - Use `find_file` to locate the file in the directory.
+       - Use `open_file` to open the file once located.
+       - Use `search_file` to find the `search_term` within the file.
+       - Use `goto_line` to jump to the specified line if needed.
+       - **Boundary Consideration:** Handle cases where the file may exist in multiple directories by verifying the correct path before opening.
+
+    3. **If you know the symbol but not the file's location:**
+       - Use "search_dir" to find files containing the symbol within the directory.
+       - Review the search results to identify the relevant file(s).
+       - Use `open_file` to open the identified file.
+       - Use `search_file` to locate the `search_term` within the open file.
+       - Use `goto_line` to jump to the specified line.
+       - **Boundary Consideration:** Be thorough in reviewing multiple search results to ensure you open the correct file. Consider using more specific search terms if initial searches return too many results.
+
+    **Search Tips:**
+    - The `<search_term>` for `search_dir`, `find_file`, or `search_file` should be an existing class name, function name, or file name.
+    - Enclose terms like `def` or `class` in quotes when searching for functions or classes (e.g., `search_dir 'def apow'` or `search_file 'class Pow'`).
+    - Use wildcard characters (`*`, `?`) in search terms to broaden or narrow down your search scope.
+    - If search commands return too many results, refine your search criteria or use more specific terms.
+    - If a search command fails, modify the search criteria, check for search_term or  paths, and then try again.
+    - Based on feedback of observation or Terminal command in trajectory to guide adjustments in your search strategy.
+
+9. When the edit fails, try to enlarge the range of code.
+10. You must use the Editor.open_file command to open a file before using the Editor tool's edit command to modify it. When you open a file, any currently open file will be automatically closed.
+11. Remember, when you use Editor.insert_content_at_line or Editor.edit_file_by_replace, the line numbers will change after the operation. Therefore, if there are multiple operations, perform only the first operation in the current response, and defer the subsequent operations to the next turn.
+11.1 Do not use Editor.insert_content_at_line or Editor.edit_file_by_replace more than once per command list.
+12. If you choose Editor.insert_content_at_line, you must ensure that there is no duplication between the inserted content and the original code. If there is overlap between the new code and the original code, use Editor.edit_file_by_replace instead.
+13. If you choose Editor.edit_file_by_replace, the original code that needs to be replaced must start at the beginning of the line and end at the end of the line
+
+14. When not specified, you should write files in a folder named "src". If you know the project path, then write in a "src" folder under the project path.
+15. When provided system design or project schedule, you MUST read them first before making a plan, then adhere to them in your implementation, especially in the programming language, package, or framework. You MUST implement all code files prescribed in the system design or project schedule. You can create a plan first with each task corresponding to implementing one code file.
+16. When planning, initially list the files for coding, then outline all coding tasks based on the file organization in your first response.
+17. If you plan to read a file, do not include other plans in the same response.
+18. Write only one code file each time and provide its full implementation.
+19. When the requirement is simple, you don't need to create a plan, just do it right away.
+20. If the code exists, use the Editor tool's open and edit commands to modify it. Since it is not a new code, do not use write_new_code.
+21. When using the editor, pay attention to the editor's current directory. When you use editor tools, the paths must be either absolute or relative to the editor's current directory.
+"""
+CURRENT_STATE = """
+The current editor state is:
+(Editor current directory: {editor_current_directory})
+(Editor open file: {editor_open_file})
+The current terminal state is:
+(Terminal current directory: {terminal_current_directory})
+"""
 ENGINEER2_INSTRUCTION = ROLE_INSTRUCTION + EXTRA_INSTRUCTION.strip()

 WRITE_CODE_SYSTEM_PROMPT = """
@ -35,7 +92,7 @@ You are a world-class engineer, your goal is to write google-style, elegant, mod
 Pay attention to the conversation history and the following constraints:
 1. When provided system design, YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design.
 2. When modifying a code, rewrite the full code instead of updating or inserting a snippet.
-3. Write out EVERY CODE DETAIL, DON'T LEAVE TODO.
+3. Write out EVERY CODE DETAIL, DON'T LEAVE TODO OR PLACEHOLDER.
 """

 WRITE_CODE_PROMPT = """
--- a/metagpt/prompts/di/role_zero.py
+++ b/metagpt/prompts/di/role_zero.py
@ -71,6 +71,7 @@ Pay close attention to the Example provided, you can reuse the example for your
 You may use any of the available commands to create a plan or update the plan. You may output mutiple commands, they will be executed sequentially.
 If you finish current task, you will automatically take the next task in the existing plan, use Plan.finish_task, DON'T append a new task.
 Review the latest plan's outcome, focusing on achievements. If your completed task matches the current, consider it finished.
+Using Editor.insert_content_at_line and Editor.edit_file_by_replace more than once in the current command list is forbidden. Because the command is mutually exclusive and will change the line number after execution.
 In your response, include at least one command.

 # Your commands in a json array, in the following output format with correct command_name and args. If there is nothing to do, use the pass or end command:
@ -103,6 +104,7 @@ Fifth, describe if you should terminate, you should use **end** command to termi
 REGENERATE_PROMPT = """
 Review and reflect on the history carefully, provide a different response.
 Describe if you should terminate using **end** command, or use **RoleZero.ask_human** to ask human for help, or try a different approach and output different commands. You are NOT allowed to provide the same commands again.
+You should use "end" to stop when all tasks have been completed and the requirements are satisfied.
 Your reflection, then the commands in a json array:
 """
 ASK_HUMAN_COMMAND = """
--- a/metagpt/prompts/di/team_leader.py
+++ b/metagpt/prompts/di/team_leader.py
@ -24,7 +24,6 @@ Note:
 - XL: Social media platform, e-commerce app, real-time multiplayer game
 - For XS and S requirements, you don't need the standard software development process, you may directly ask Engineer to write the code. Otherwise, estimate if any part of the standard software development process may contribute to a better final code. If so, assign team members accordingly.
 3.1 If the task involves code review (CR) or code checking, you should assign it to Engineer.
-3.2. If the requirement is to fix a bug or issue, you should assign it to Issue Solver. However, if the code is written by Engineer, Engineer must maintain the code.
 4. If the requirement is a common-sense, logical, or math problem, you should respond directly without assigning any task to team members.
 5. If you think the requirement is not clear or ambiguous, you should ask the user for clarification immediately. Assign tasks only after all info is clear.
 6. It is helpful for Engineer to have both the system design and the project schedule for writing the code, so include paths of both files (if available) and remind Engineer to definitely read them when publishing message to Engineer.
@ -43,7 +42,6 @@ Sixth, describe the requirements as they pertain to software development, data a
 Seventh, describe the technologies you must use.  
 """
 )
-
 TL_INFO = """
 {role_info}
 Your team member:
--- a/metagpt/roles/di/engineer2.py
+++ b/metagpt/roles/di/engineer2.py
@ -4,8 +4,11 @@ from pathlib import Path

 from pydantic import Field

+from metagpt.logs import logger
+
 # from metagpt.actions.write_code_review import ValidateAndRewriteCode
 from metagpt.prompts.di.engineer2 import (
+    CURRENT_STATE,
    ENGINEER2_INSTRUCTION,
    WRITE_CODE_PROMPT,
    WRITE_CODE_SYSTEM_PROMPT,
@ -14,6 +17,7 @@ from metagpt.roles.di.role_zero import RoleZero
 from metagpt.schema import UserMessage
 from metagpt.strategy.experience_retriever import ENGINEER_EXAMPLE
 from metagpt.tools.libs.cr import CodeReview
+from metagpt.tools.libs.git import git_create_pull
 from metagpt.tools.libs.terminal import Terminal
 from metagpt.tools.tool_registry import register_tool
 from metagpt.utils.common import CodeParser, awrite
@ -26,24 +30,69 @@ class Engineer2(RoleZero):
    profile: str = "Engineer"
    goal: str = "Take on game, app, and web development."
    instruction: str = ENGINEER2_INSTRUCTION
-
    terminal: Terminal = Field(default_factory=Terminal, exclude=True)

-    tools: list[str] = ["Plan", "Editor:read", "RoleZero", "Terminal:run_command", "Engineer2", "SearchEnhancedQA", "CodeReview"]
+    tools: list[str] = [
+        "Plan",
+        "Editor",
+        "RoleZero",
+        "Terminal:run_command",
+        "Browser:goto,scroll",
+        "git_create_pull",
+        "SearchEnhancedQA",
+        "Engineer2",
+        "CodeReview",
+    ]
+    # SWE Agent parameter
+    run_eval: bool = False
+    output_diff: str = ""
+    max_react_loop: int = 40
+
+    async def _think(self) -> bool:
+        await self._format_instruction()
+        res = await super()._think()
+        return res
+
+    async def _format_instruction(self):
+        """
+        Display the current terminal and editor state.
+        This information will be dynamically added to the command prompt.
+        """
+        state = {
+            "editor_open_file": self.editor.current_file,
+            "editor_current_directory": self.editor.working_dir,
+            "terminal_current_directory": await self.terminal.run_command("pwd"),
+        }
+        self.cmd_prompt_current_state = CURRENT_STATE.format(**state).strip()

    def _update_tool_execution(self):
        # validate = ValidateAndRewriteCode()
        cr = CodeReview()
-        self.tool_execution_map.update(
-            {
-                "Terminal.run_command": self.terminal.run_command,
-                "Engineer2.write_new_code": self.write_new_code,
-                "CodeReview.review": cr.review,
-                "CodeReview.fix": cr.fix,
-                # "ValidateAndRewriteCode.run": validate.run,
-                # "ValidateAndRewriteCode": validate.run,
-            }
-        )
+        self.exclusive_tool_commands.append("Engineer2.write_new_code")
+        if self.run_eval is True:
+            # Evalute tool map
+            self.tool_execution_map.update(
+                {
+                    "git_create_pull": git_create_pull,
+                    "Engineer2.write_new_code": self.write_new_code,
+                    "CodeReview.review": cr.review,
+                    "CodeReview.fix": cr.fix,
+                    "Terminal.run_command": self._eval_terminal_run,
+                    "RoleZero.ask_human": self._end,
+                    "RoleZero.reply_to_human": self._end,
+                }
+            )
+        else:
+            # Default tool map
+            self.tool_execution_map.update(
+                {
+                    "git_create_pull": git_create_pull,
+                    "Engineer2.write_new_code": self.write_new_code,
+                    "CodeReview.review": cr.review,
+                    "CodeReview.fix": cr.fix,
+                    "Terminal.run_command": self.terminal.run_command,
+                }
+            )

    def _retrieve_experience(self) -> str:
        return ENGINEER_EXAMPLE
@ -82,3 +131,14 @@ class Engineer2(RoleZero):

        # TODO: Consider adding line no to be ready for editing.
        return f"The file {path} has been successfully created, with content:\n{code}"
+
+    async def _eval_terminal_run(self, cmd):
+        """change command pull/push/commit to end."""
+        if any([cmd_key_word in cmd for cmd_key_word in ["pull", "push", "commit"]]):
+            # The Engineer2 attempts to submit the repository after fixing the bug, thereby reaching the end of the fixing process.
+            logger.info("Engineer2 use cmd:{cmd}\nCurrent test case is finished.")
+            # Set self.rc.todo to None to stop the engineer.
+            self._set_state(-1)
+        else:
+            command_output = await self.terminal.run_command(cmd)
+        return command_output
--- a/metagpt/roles/di/role_zero.py
+++ b/metagpt/roles/di/role_zero.py
@ -75,8 +75,15 @@ class RoleZero(Role):
    tool_recommender: Optional[ToolRecommender] = None
    tool_execution_map: Annotated[dict[str, Callable], Field(exclude=True)] = {}
    special_tool_commands: list[str] = ["Plan.finish_current_task", "end", "Bash.run"]
+    # List of exclusive tool commands.
+    # If multiple instances of these commands appear, only the first occurrence will be retained.
+    exclusive_tool_commands: list[str] = [
+        "Editor.edit_file_by_replace",
+        "Editor.insert_content_at_line",
+        "Editor.append_file",
+    ]
    # Equipped with three basic tools by default for optional use
-    editor: Editor = Editor()
+    editor: Editor = Editor(enable_auto_lint=True)
    browser: Browser = Browser()

    # Experience
@ -152,7 +159,7 @@ class RoleZero(Role):
                    "scroll_up",
                    "search_dir",
                    "search_file",
-                    "set_workdir",
+                    # "set_workdir",
                    "write",
                ]
            }
@ -233,10 +240,8 @@ class RoleZero(Role):
        async with ThoughtReporter(enable_llm_stream=True) as reporter:
            await reporter.async_report({"type": "react"})
            self.command_rsp = await self.llm_cached_aask(req=req, system_msgs=[system_prompt], state_data=state_data)
-
        self.command_rsp = await self._check_duplicates(req, self.command_rsp)

-        self._add_memory(AIMessage(content=self.command_rsp))
        return True

    @exp_cache(context_builder=RoleZeroContextBuilder(), serializer=RoleZeroSerializer())
@ -276,7 +281,8 @@ class RoleZero(Role):
        if self.use_fixed_sop:
            return await super()._act()

-        commands, ok = await self._parse_commands(self.command_rsp)
+        commands, ok, self.command_rsp = await self._parse_commands(self.command_rsp)
+        self._add_memory(AIMessage(content=self.command_rsp))
        if not ok:
            error_msg = commands
            self._add_memory(UserMessage(content=error_msg))
@ -424,12 +430,27 @@ class RoleZero(Role):
            tb = traceback.format_exc()
            print(tb)
            error_msg = str(e)
-            return error_msg, False
+            return error_msg, False, command_rsp

        # 为了对LLM不按格式生成进行容错
        if isinstance(commands, dict):
            commands = commands["commands"] if "commands" in commands else [commands]
-        return commands, True
+
+        # Set the exclusive command flag to False.
+        command_flag = [command["command_name"] not in self.exclusive_tool_commands for command in commands]
+        if command_flag.count(False) > 1:
+            # Keep only the first exclusive command
+            index_of_first_exclusive = command_flag.index(False)
+            commands = [
+                cmd
+                for index, cmd in enumerate(commands)
+                if index == index_of_first_exclusive or cmd["command_name"] not in self.exclusive_tool_commands
+            ]
+            command_rsp = "```json\n" + json.dumps(commands, indent=4, ensure_ascii=False) + "\n```json"
+            logger.info(
+                "exclusive command more than one in current command list. change the command list.\n" + command_rsp
+            )
+        return commands, True, command_rsp

    async def _run_commands(self, commands) -> str:
        outputs = []
@ -473,7 +494,9 @@ class RoleZero(Role):
        if cmd["command_name"] == "Plan.finish_current_task":
            if not self.planner.plan.is_plan_finished():
                self.planner.plan.finish_current_task()
-            command_output = "Current task is finished. If all tasks are finished, use 'end' to stop."
+            command_output = (
+                "Current task is finished. If you no longer need to take action, use the command ‘end’ to stop."
+            )

        elif cmd["command_name"] == "end":
            command_output = await self._end()
@ -488,6 +511,7 @@ class RoleZero(Role):
                )
            else:
                command_output += f"\n[command]: {cmd['args']['cmd']} \n[command output] : {tool_output}"
+
        return command_output

    def _get_plan_status(self) -> Tuple[str, str]:
@ -536,7 +560,7 @@ class RoleZero(Role):
            return "Not in MGXEnv, command will not be executed."
        return await self.rc.env.reply_to_human(content, sent_from=self)

-    async def _end(self):
+    async def _end(self, **kwarg):
        self._set_state(-1)
        memory = self._fetch_memories()
        # Ensure reply to the human before the "end" command is executed. Hard code k=5 for checking.
--- a/metagpt/roles/di/swe_agent.py
+++ b/metagpt/roles/di/swe_agent.py
@ -46,7 +46,6 @@ class SWEAgent(RoleZero):
    async def _format_instruction(self):
        """
        Formats the instruction message for the SWE agent.
-
        Runs the "state" command in the terminal, parses its output as JSON,
        and uses it to format the `_instruction` template.
        """
@ -63,10 +62,8 @@ class SWEAgent(RoleZero):
    async def _parse_commands_for_eval(self):
        """
        Handles actions based on parsed commands.
-
        Parses commands, checks for a "submit" action, and generates a patch using `git diff`.
        Stores the cleaned patch in `output_diff`. Logs any exceptions.
-
        This function is specifically added for SWE bench evaluation.
        """
        # If todo switches to None, it indicates that this is the final round of reactions, and the Swe-Agent will stop. Use git diff to store any changes made.
@ -79,7 +76,6 @@ class SWEAgent(RoleZero):
                logger.info(f"Diff output: \n{clear_diff}")
                if clear_diff:
                    self.output_diff = clear_diff
-
            except Exception as e:
                logger.error(f"Error during submission: {e}")

--- a/metagpt/strategy/experience_retriever.py
+++ b/metagpt/strategy/experience_retriever.py
@ -842,7 +842,7 @@ Explanation: I will first need to read the system design document and the projec

 ## example 2
 Consider this example only after you have obtained the content of system design and project schedule documents.
-Suppose the system design and project schedule prescribes three files index.html, style.css, script.js, to follow the design and schedule, I will create a plan consisting of three tasks, each corresponding to the creation of one of the required files: `index.html`, `style.css`, and `script.js`. Following the completion of these tasks, I will add a code review task for each file to ensure the implementation aligns with the provided system design and project schedule documents.
+Suppose the system design and project schedule prescribes three files index.html, style.css, script.js, to follow the design and schedule, I will create a plan consisting of three tasks, each corresponding to the creation of one of the required files: `index.html`, `style.css`, and `script.js`. 

 Here's the plan:

@ -901,8 +901,132 @@ Explanation: Take on one task, such as writing a file. Upon completion, finish c
    }
 ]
 ```
-"""

+## example 4
+I have received a GitHub issue URL.
+I will use browser to review the detailed information of this issue in order to understand the problem.
+```json
+[
+    {
+        "command_name": "Browser.goto",
+        "args": {
+            "url": "https://github.com/geekan/MetaGPT/issues/1275"
+        }
+    }
+]
+```
+
+## example 6
+I need to locating the `openai_api.py` file, so I will search for the `openai_api.py` file.
+```json
+[
+    {
+        "command_name": "Editor.find_file",
+        "args": {
+            "file_name": "openai_api.py"   
+        }
+    }
+]
+```
+
+
+
+## example 7
+I have located the openai_api.py file. I want to edit this file, so I will open it first.
+```json
+[
+    {
+        "command_name": "Editor.open_file",
+        "args": {
+            "path": "/workspace/MetaGPT/provider/openai_api.py"   
+        }
+    }
+]
+```
+
+## example 8
+I have opened the openai_api.py file. However, the range of lines shown is from 001 to 100, and I want to see more. Therefore, I want to use the scroll_down command to view additional lines.
+```json
+[
+    {
+        "command_name": "Editor.scroll_down",
+        "args": {{}}
+    }
+]
+```
+
+## example 9
+I've found the bug and will start fixing it. I'll pay close attention to the indentation.
+Since I only need to modify a few lines in this file, I will use Editor.edit_file_by_replace. The original content will be replaced by the new code.
+Editor tool is exclusive. If I use this tool, I cannot use any other commands in the current response.
+```json
+[
+    {
+        "command_name": "Editor.edit_file_by_replace",
+        "args": {
+            "file_name":"/workspace/MetaGPT/provider/openai_api.py",
+            "to_replace": "            inv_trig_table = ["asin", "acos", "atan", "acot"]"
+            "new_content": "            inv_trig_table = ["asin", "acos", "atan", "acsc", "asec", "acot"]"
+        }
+    }
+]
+```
+
+## example 10
+I only need to add a few lines to the file, so I will use Editor.insert_content_at_line. The new code will not cover the original code.
+Note that the Editor command must be executed in a single response, so this step will only involve using the Editor command.
+```json
+[
+    {
+        "command_name": "Editor.insert_content_at_line",
+        "args": {
+            "file_name":"/workspace/MetaGPT/provider/openai_api.py"
+            "line_number":727,
+            "content": "if hasattr(self, '_print_' + func) and not isinstance(expr.func, UndefinedFunction):\\n            return getattr(self, '_print_' + func)(expr, exp)"
+        }
+    }
+]
+```
+
+## example 10.1
+To enhance the functionality of the 2048 game, including game end detection and score tracking, we need to add these features to the existing game_2048.py file. First, we will add a score tracking feature, and then we will insert game end detection logic into the game loop.
+We will use the Editor.insert_content_at_line command to insert new code into the file for adding score tracking and game end detection.
+Since Editor.insert_content_at_line can only be used once per response, this time I will use it to create the variable self.score
+```json
+[
+    {
+        "command_name": "Editor.insert_content_at_line",
+        "args": {
+            "file_name": "/home/mgx/mgx/MetaGPT/workspace/2048_game_py/game_2048.py",
+            "line_number": 4,
+            "content": "        self.score = 0\n"
+        }
+    }
+]
+```
+In the next turn, I will try to add another code snippet
+
+## example 11
+
+Create a pull request (Optional): Merge the changes from the new branch into the master branch.
+Thought: Now that the changes have been pushed to the remote repository, due to the user's requirement, let's create a pull request to merge the changes into the master branch.
+```json
+[
+    {
+        "command_name": "git_create_pull",
+        "args": {
+            "base": "master",
+            "head": "test-fix",
+            "base_repo_name": "garylin2099/MetaGPT",
+            "head_repo_name": "seeker-jie/MetaGPT",
+            "app_name": "github",
+            "title": "Fix Issue #1275: produced TypeError: openai.types.completion_usage.CompletionUsage() argument after ** must be a mapping, not NoneType"",
+            "body": "This pull request addresses issue #1275 by ensuring that chunk.usage is not None before passing it to CompletionUsage."
+            }
+        }
+]
+```
+"""

 WEB_SCRAPING_EXAMPLE = """
 ## action 1
--- a/metagpt/tools/libs/editor.py
+++ b/metagpt/tools/libs/editor.py
@ -3,31 +3,65 @@ This file is borrowed from OpenDevin
 You can find the original repository here:
 https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/plugins/agent_skills/file_ops/file_ops.py
 """
-import base64
 import os
 import re
 import shutil
 import tempfile
 from pathlib import Path
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Union

 from pydantic import BaseModel, ConfigDict

-from metagpt.config2 import Config
 from metagpt.const import DEFAULT_WORKSPACE_ROOT
 from metagpt.logs import logger
+from metagpt.tools.libs.index_repo import IndexRepo
 from metagpt.tools.libs.linter import Linter
 from metagpt.tools.tool_registry import register_tool
-from metagpt.utils import read_docx
-from metagpt.utils.common import aread, aread_bin, awrite_bin, check_http_endpoint
-from metagpt.utils.repo_to_markdown import is_text_file
+from metagpt.utils.file import File
 from metagpt.utils.report import EditorReporter

 # This is also used in unit tests!
-MSG_FILE_UPDATED = "[File updated (edited at line {line_number}). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]"
 LINTER_ERROR_MSG = "[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n"


+INDENTATION_INFO = """
+The previous line is:
+"{pre_line}"
+The indentation has {pre_line_indent} spaces.
+
+The error line is:
+"{insert_line}"
+The indentation has {insert_line_indent} spaces.
+
+Please check the indentation of the code to ensure that it is not causing any errors.
+Try using indentation with either {sub_4_space} or {add_4_space} spaces.
+"""
+
+ERROR_GUIDANCE = """
+{linter_error_msg}
+
+[This is how your edit would have looked if applied]
+-------------------------------------------------
+{window_after_applied}
+-------------------------------------------------
+
+[This is the original code before your edit]
+-------------------------------------------------
+{window_before_applied}
+-------------------------------------------------
+
+Your changes have NOT been applied. Please fix your edit command and try again
+{guidance_message}
+
+"""
+
+SUCCESS_EDIT_INFO = """
+[File: {file_name} ({n_total_lines} lines total after edit)]
+{window_after_applied}
+[File updated (edited at line {line_number}). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+"""
+
+
 class FileBlock(BaseModel):
    """A block of content in a file"""

@ -70,23 +104,12 @@ class Editor(BaseModel):

    async def read(self, path: str) -> FileBlock:
        """Read the whole content of a file. Using absolute paths as the argument for specifying the file location."""
-        is_text, mime_type = await is_text_file(path)
-        if is_text:
-            lines = await self._read_text(path)
-        elif mime_type == "application/pdf":
-            lines = await self._read_pdf(path)
-        elif mime_type in {
-            "application/msword",
-            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-            "application/vnd.ms-word.document.macroEnabled.12",
-            "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
-            "application/vnd.ms-word.template.macroEnabled.12",
-        }:
-            lines = await self._read_docx(path)
-        else:
+        content = await File.read_text_file(path)
+        if not content:
            return FileBlock(file_path=str(path), block_content="")
        self.resource.report(str(path), "path")

+        lines = content.splitlines(keepends=True)
        lines_with_num = [f"{i + 1:03}|{line}" for i, line in enumerate(lines)]
        result = FileBlock(
            file_path=str(path),
@ -94,80 +117,6 @@ class Editor(BaseModel):
        )
        return result

-    @staticmethod
-    async def _read_text(path: Union[str, Path]) -> List[str]:
-        content = await aread(path)
-        lines = content.split("\n")
-        return lines
-
-    @staticmethod
-    async def _read_pdf(path: Union[str, Path]) -> List[str]:
-        result = await Editor._omniparse_read_file(path)
-        if result:
-            return result
-
-        from llama_index.readers.file import PDFReader
-
-        reader = PDFReader()
-        lines = reader.load_data(file=Path(path))
-        return [i.text for i in lines]
-
-    @staticmethod
-    async def _read_docx(path: Union[str, Path]) -> List[str]:
-        result = await Editor._omniparse_read_file(path)
-        if result:
-            return result
-        return read_docx(str(path))
-
-    @staticmethod
-    async def _omniparse_read_file(path: Union[str, Path]) -> Optional[List[str]]:
-        from metagpt.tools.libs import get_env_default
-        from metagpt.utils.omniparse_client import OmniParseClient
-
-        env_base_url = await get_env_default(key="base_url", app_name="OmniParse", default_value="")
-        env_timeout = await get_env_default(key="timeout", app_name="OmniParse", default_value="")
-        conf_base_url, conf_timeout = await Editor._read_omniparse_config()
-
-        base_url = env_base_url or conf_base_url
-        if not base_url:
-            return None
-        api_key = await get_env_default(key="api_key", app_name="OmniParse", default_value="")
-        timeout = env_timeout or conf_timeout or 600
-        try:
-            timeout = int(timeout)
-        except ValueError:
-            timeout = 600
-
-        try:
-            if not await check_http_endpoint(url=base_url):
-                logger.warning(f"{base_url}: NOT AVAILABLE")
-                return None
-            client = OmniParseClient(api_key=api_key, base_url=base_url, max_timeout=timeout)
-            file_data = await aread_bin(filename=path)
-            ret = await client.parse_document(file_input=file_data, bytes_filename=str(path))
-        except (ValueError, Exception) as e:
-            logger.exception(f"{path}: {e}")
-            return None
-        if not ret.images:
-            return [ret.text] if ret.text else None
-
-        result = [ret.text]
-        img_dir = Path(path).parent / (Path(path).name.replace(".", "_") + "_images")
-        img_dir.mkdir(parents=True, exist_ok=True)
-        for i in ret.images:
-            byte_data = base64.b64decode(i.image)
-            filename = img_dir / i.image_name
-            await awrite_bin(filename=filename, data=byte_data)
-            result.append(f"![{i.image_name}]({str(filename)})")
-        return result
-
-    @staticmethod
-    async def _read_omniparse_config() -> Tuple[str, int]:
-        config = Config.default()
-        if config.omniparse and config.omniparse.url:
-            return config.omniparse.url, config.omniparse.timeout
-        return "", 0
-
    @staticmethod
    def _is_valid_filename(file_name: str) -> bool:
        if not file_name or not file_name.strip():
@ -277,7 +226,7 @@ class Editor(BaseModel):
            return ""
        return f"[File: {current_file.resolve()} ({total_lines} lines total)]\n"

-    def set_workdir(self, path: str) -> None:
+    def _set_workdir(self, path: str) -> None:
        """
        Sets the working directory to the given path. eg: repo directory.
        You MUST to set it up before open the file.
@ -321,6 +270,7 @@ class Editor(BaseModel):

        output = self._cur_file_header(path, total_lines)
        output += self._print_window(path, self.current_line, self._clamp(context_lines, 1, 2000))
+        self.resource.report(path, "path")
        return output

    def goto_line(self, line_number: int) -> str:
@ -499,6 +449,25 @@ class Editor(BaseModel):
        content = "".join(new_lines)
        return content, n_added_lines

+    def _get_indentation_info(self, content, first_line):
+        """
+        The indentation of the first insert line and the previous line, along with guidance for the next attempt.
+        """
+        content_lines = content.split("\n")
+        pre_line = content_lines[first_line - 2] if first_line - 2 >= 0 else ""
+        pre_line_indent = len(pre_line) - len(pre_line.lstrip())
+        insert_line = content_lines[first_line - 1]
+        insert_line_indent = len(insert_line) - len(insert_line.lstrip())
+        ret_str = INDENTATION_INFO.format(
+            pre_line=pre_line,
+            pre_line_indent=pre_line_indent,
+            insert_line=insert_line,
+            insert_line_indent=insert_line_indent,
+            sub_4_space=max(insert_line_indent - 4, 0),
+            add_4_space=insert_line_indent + 4,
+        )
+        return ret_str
+
    def _edit_file_impl(
        self,
        file_name: Path,
@ -518,7 +487,6 @@ class Editor(BaseModel):
            is_insert: bool = False: Whether to insert content at the given line number instead of editing.
            is_append: bool = False: Whether to append content to the file instead of editing.
        """
-        ret_str = ""

        ERROR_MSG = f"[Error editing file {file_name}. Please confirm the file is correct.]"
        ERROR_MSG_SUFFIX = (
@ -568,14 +536,12 @@ class Editor(BaseModel):
                    try:
                        content, n_added_lines = self._insert_impl(lines, start, content)
                    except LineNumberError as e:
-                        ret_str += (f"{ERROR_MSG}\n" f"{e}\n" f"{ERROR_MSG_SUFFIX}") + "\n"
-                        return ret_str
+                        return (f"{ERROR_MSG}\n" f"{e}\n" f"{ERROR_MSG_SUFFIX}") + "\n"
                else:
                    try:
                        content, n_added_lines = self._edit_impl(lines, start, end, content)
                    except LineNumberError as e:
-                        ret_str += (f"{ERROR_MSG}\n" f"{e}\n" f"{ERROR_MSG_SUFFIX}") + "\n"
-                        return ret_str
+                        return (f"{ERROR_MSG}\n" f"{e}\n" f"{ERROR_MSG_SUFFIX}") + "\n"

                if not content.endswith("\n"):
                    content += "\n"
@ -622,9 +588,11 @@ class Editor(BaseModel):
                        first_error_line = None

                if lint_error is not None:
-                    if first_error_line is not None:
-                        show_line = int(first_error_line)
-                    elif is_append:
+                    # if first_error_line is not None:
+                    #     show_line = int(first_error_line)
+
+                    # show the first insert line.
+                    if is_append:
                        # original end-of-file
                        show_line = len(lines)
                    # insert OR edit WILL provide meaningful line numbers
@ -633,52 +601,52 @@ class Editor(BaseModel):
                    else:
                        raise ValueError("Invalid state. This should never happen.")

-                    ret_str += LINTER_ERROR_MSG
-                    ret_str += lint_error + "\n"
-
-                    editor_lines = n_added_lines + 20
-
-                    ret_str += "[This is how your edit would have looked if applied]\n"
-                    ret_str += "-------------------------------------------------\n"
-                    ret_str += self._print_window(file_name, show_line, editor_lines, return_str=True) + "\n"
-                    ret_str += "-------------------------------------------------\n\n"
-
-                    ret_str += "[This is the original code before your edit]\n"
-                    ret_str += "-------------------------------------------------\n"
-                    ret_str += (
-                        self._print_window(
-                            original_file_backup_path,
-                            show_line,
-                            editor_lines,
-                        )
-                        + "\n"
-                    )
-                    ret_str += "-------------------------------------------------\n"
-
-                    ret_str += (
-                        "Your changes have NOT been applied. Please fix your edit command and try again.\n"
+                    guidance_message = self._get_indentation_info(content, start or len(lines))
+                    guidance_message += (
                        "You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n"
                        "DO NOT re-run the same failed edit command. Running it again will lead to the same error."
                    )
+                    lint_error_info = ERROR_GUIDANCE.format(
+                        linter_error_msg=LINTER_ERROR_MSG + lint_error,
+                        window_after_applied=self._print_window(file_name, show_line, n_added_lines + 20),
+                        window_before_applied=self._print_window(
+                            original_file_backup_path, show_line, n_added_lines + 20
+                        ),
+                        guidance_message=guidance_message,
+                    ).strip()

                    # recover the original file
                    with original_file_backup_path.open() as fin, file_name.open("w") as fout:
                        fout.write(fin.read())
                    original_file_backup_path.unlink()
-                    return ret_str
+                    return lint_error_info

        except FileNotFoundError as e:
-            ret_str += f"File not found: {e}\n"
+            return f"File not found: {e}\n"
        except IOError as e:
-            ret_str += f"An error occurred while handling the file: {e}\n"
+            return f"An error occurred while handling the file: {e}\n"
        except ValueError as e:
-            ret_str += f"Invalid input: {e}\n"
+            return f"Invalid input: {e}\n"
        except Exception as e:
+            guidance_message = self._get_indentation_info(content, start or len(lines))
+            guidance_message += (
+                "You either need to 1) Specify the correct start/end line arguments or 2) Enlarge the range of original code.\n"
+                "DO NOT re-run the same failed edit command. Running it again will lead to the same error."
+            )
+            error_info = ERROR_GUIDANCE.format(
+                linter_error_msg=LINTER_ERROR_MSG + str(e),
+                window_after_applied=self._print_window(file_name, start or len(lines), 40),
+                window_before_applied=self._print_window(original_file_backup_path, start or len(lines), 40),
+                guidance_message=guidance_message,
+            ).strip()
            # Clean up the temporary file if an error occurs
+            with original_file_backup_path.open() as fin, file_name.open("w") as fout:
+                fout.write(fin.read())
            if temp_file_path and Path(temp_file_path).exists():
                Path(temp_file_path).unlink()
-            logger.warning(f"An unexpected error occurred: {e}")
-            raise e
+
+            # logger.warning(f"An unexpected error occurred: {e}")
+            raise Exception(f"{error_info}") from e

        # Update the file information and print the updated content
        with file_name.open("r", encoding="utf-8") as file:
@ -690,11 +658,13 @@ class Editor(BaseModel):
                self.current_line = max(1, len(lines))  # end of original file
            else:
                self.current_line = start or n_total_lines or 1
-        ret_str += f"[File: {file_name.resolve()} ({n_total_lines} lines total after edit)]\n"
-        CURRENT_FILE = file_name
-        ret_str += self._print_window(CURRENT_FILE, self.current_line, self.window) + "\n"
-        ret_str += MSG_FILE_UPDATED.format(line_number=self.current_line)
-        return ret_str
+        success_edit_info = SUCCESS_EDIT_INFO.format(
+            file_name=file_name.resolve(),
+            n_total_lines=n_total_lines,
+            window_after_applied=self._print_window(file_name, self.current_line, self.window),
+            line_number=self.current_line,
+        ).strip()
+        return success_edit_info

    def edit_file_by_replace(self, file_name: str, to_replace: str, new_content: str) -> str:
        """Edit a file. This will search for `to_replace` in the given file and replace it with `new_content`.
@ -741,6 +711,10 @@ class Editor(BaseModel):
            file_name: str: The name of the file to edit.
            to_replace: str: The content to search for and replace.
            new_content: str: The new content to replace the old content with.
+
+        NOTE:
+            This tool is exclusive. If you use this tool, you cannot use any other commands in the current response.
+            If you need to use it multiple times, wait for the next turn.
        """
        # FIXME: support replacing *all* occurrences
        if to_replace.strip() == "":
@ -792,6 +766,7 @@ class Editor(BaseModel):
        )
        # lint_error = bool(LINTER_ERROR_MSG in ret_str)
        # TODO: automatically tries to fix linter error (maybe involve some static analysis tools on the location near the edit to figure out indentation)
+        self.resource.report(file_name, "path")
        return ret_str

    def insert_content_at_line(self, file_name: str, line_number: int, content: str) -> str:
@ -816,6 +791,9 @@ class Editor(BaseModel):
            file_name: str: The name of the file to edit.
            line_number: int: The line number (starting from 1) to insert the content after.
            content: str: The content to insert.
+        NOTE:
+            This tool is exclusive. If you use this tool, you cannot use any other commands in the current response.
+            If you need to use it multiple times, wait for the next turn.
        """
        file_name = self._try_fix_path(file_name)

@ -836,6 +814,9 @@ class Editor(BaseModel):
        Args:
            file_name: str: The name of the file to edit.
            content: str: The content to insert.
+        NOTE:
+            This tool is exclusive. If you use this tool, you cannot use any other commands in the current response.
+            If you need to use it multiple times, wait for the next turn.
        """
        file_name = self._try_fix_path(file_name)

@ -914,6 +895,9 @@ class Editor(BaseModel):
            res_list.append(f'[End of matches for "{search_term}" in {file_path}]')
        else:
            res_list.append(f'[No matches found for "{search_term}" in {file_path}]')
+
+        extra = {"type": "search", "symbol": search_term, "lines": [i[0] - 1 for i in matches]} if matches else None
+        self.resource.report(file_path, "path", extra=extra)
        return "\n".join(res_list)

    def find_file(self, file_name: str, dir_path: str = "./") -> str:
@ -951,3 +935,21 @@ class Editor(BaseModel):
        if not path.is_absolute():
            path = self.working_dir / path
        return path
+
+    @staticmethod
+    async def search_index_repo(query: str, file_or_path: Union[str, Path]) -> List[str]:
+        """Searches the index repository for a given query across specified files or paths.
+
+        This method classifies the provided files or paths, performing a search on each cluster
+        of files while handling other types of files separately. It merges results from structured
+        indices with any results from non-indexed files.
+
+        Args:
+            query (str): The search query string to look for in the indexed files.
+            file_or_path (Union[str, Path]): A path or a filename to search within.
+
+        Returns:
+            List[str]: A list of search results as strings, containing the text from the merged results
+                        and any direct results from other files.
+        """
+        return await IndexRepo.cross_repo_search(query=query, file_or_path=file_or_path)
--- a/metagpt/tools/libs/index_repo.py
+++ b/metagpt/tools/libs/index_repo.py
@ -1,9 +1,10 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-
+import asyncio
 import json
+import re
 from pathlib import Path
-from typing import Dict, List, Optional, Set, Union
+from typing import Dict, List, Optional, Set, Tuple, Union

 import tiktoken
 from llama_index.core.base.embeddings.base import BaseEmbedding
@ -16,7 +17,23 @@ from metagpt.rag.engines import SimpleEngine
 from metagpt.rag.factories.embedding import RAGEmbeddingFactory
 from metagpt.rag.schema import FAISSIndexConfig, FAISSRetrieverConfig, LLMRankerConfig
 from metagpt.utils.common import aread, awrite, generate_fingerprint, list_files
-from metagpt.utils.repo_to_markdown import is_text_file
+from metagpt.utils.file import File
+
+UPLOADS_INDEX_ROOT = "/data/.index/uploads"
+DEFAULT_INDEX_ROOT = UPLOADS_INDEX_ROOT
+UPLOAD_ROOT = "/data/uploads"
+DEFAULT_ROOT = UPLOAD_ROOT
+CHATS_INDEX_ROOT = "/data/.index/chats"
+CHATS_ROOT = "/data/chats/"
+OTHER_TYPE = "other"
+
+DEFAULT_MIN_TOKEN_COUNT = 10000
+DEFAULT_MAX_TOKEN_COUNT = 100000000
+
+
+class IndexRepoMeta(BaseModel):
+    min_token_count: int
+    max_token_count: int


 class TextScore(BaseModel):
@ -26,12 +43,15 @@ class TextScore(BaseModel):


 class IndexRepo(BaseModel):
-    persist_path: str  # The persist path of the index repo, {DEFAULT_WORKSPACE_ROOT}/.index/{chat_id or 'uploads'}/
-    root_path: str  # `/data/uploads` or r`/data/chats/\d+`, the root path of files indexed by the index repo.
+    persist_path: str = DEFAULT_INDEX_ROOT  # The persist path of the index repo, `/data/.index/uploads/` or `/data/.index/chats/{chat_id}/`
+    root_path: str = (
+        DEFAULT_ROOT  # `/data/uploads` or r`/data/chats/\d+`, the root path of files indexed by the index repo.
+    )
    fingerprint_filename: str = "fingerprint.json"
+    meta_filename: str = "meta.json"
    model: Optional[str] = None
-    min_token_count: int = 10000
-    max_token_count: int = 100000000
+    min_token_count: int = DEFAULT_MIN_TOKEN_COUNT
+    max_token_count: int = DEFAULT_MAX_TOKEN_COUNT
    recall_count: int = 5
    embedding: Optional[BaseEmbedding] = Field(default=None, exclude=True)
    fingerprints: Dict[str, str] = Field(default_factory=dict)
@ -65,16 +85,21 @@ class IndexRepo(BaseModel):
        """
        encoding = tiktoken.get_encoding("cl100k_base")
        result: List[Union[NodeWithScore, TextScore]] = []
-        filenames, _ = await self._filter(filenames)
+        filenames, excludes = await self._filter(filenames)
+        if not filenames:
+            raise ValueError(f"Unsupported file types: {[str(i) for i in excludes]}")
        filter_filenames = set()
+        meta = await self._read_meta()
        for i in filenames:
-            content = await aread(filename=i)
+            content = await File.read_text_file(i)
            token_count = len(encoding.encode(content))
-            if not self._is_buildable(token_count):
+            if not self._is_buildable(
+                token_count, min_token_count=meta.min_token_count, max_token_count=meta.max_token_count
+            ):
                result.append(TextScore(filename=str(i), text=content))
                continue
            file_fingerprint = generate_fingerprint(content)
-            if self.fingerprints.get(str(i)) != file_fingerprint:
+            if self.fingerprints.get(str(i)) != file_fingerprint and Path(i).suffix.lower() not in {".pdf"}:
                logger.error(f'file: "{i}" changed but not indexed')
                continue
            filter_filenames.add(str(i))
@ -93,6 +118,10 @@ class IndexRepo(BaseModel):
        Returns:
            List[Union[NodeWithScore, TextScore]]: A list of merged results sorted by similarity.
        """
+        flat_nodes = [node for indices in indices_list if indices for node in indices if node]
+        if len(flat_nodes) <= self.recall_count:
+            return flat_nodes
+
        if not self.embedding:
            config = Config.default()
            if self.model:
@ -102,7 +131,6 @@ class IndexRepo(BaseModel):

        scores = []
        query_embedding = await self.embedding.aget_text_embedding(query)
-        flat_nodes = [node for indices in indices_list for node in indices]
        for i in flat_nodes:
            text_embedding = await self.embedding.aget_text_embedding(i.text)
            similarity = self.embedding.similarity(query_embedding, text_embedding)
@ -121,7 +149,7 @@ class IndexRepo(BaseModel):
        filter_filenames = []
        delete_filenames = []
        for i in filenames:
-            content = await aread(filename=i)
+            content = await File.read_text_file(i)
            if not self._is_fingerprint_changed(filename=i, content=content):
                continue
            token_count = len(encoding.encode(content))
@ -169,10 +197,11 @@ class IndexRepo(BaseModel):
            logger.debug(f"add docs {filenames}")
        engine.persist(persist_dir=self.persist_path)
        for i in filenames:
-            content = await aread(i)
+            content = await File.read_text_file(i)
            fp = generate_fingerprint(content)
            self.fingerprints[str(i)] = fp
        await awrite(filename=Path(self.persist_path) / self.fingerprint_filename, data=json.dumps(self.fingerprints))
+        await self._save_meta()

    def __str__(self):
        """Return a string representation of the IndexRepo.
@ -182,7 +211,7 @@ class IndexRepo(BaseModel):
        """
        return f"{self.persist_path}"

-    def _is_buildable(self, token_count: int) -> bool:
+    def _is_buildable(self, token_count: int, min_token_count: int = -1, max_token_count=-1) -> bool:
        """Check if the token count is within the buildable range.

        Args:
@ -191,7 +220,9 @@ class IndexRepo(BaseModel):
        Returns:
            bool: True if buildable, False otherwise.
        """
-        if token_count < self.min_token_count or token_count > self.max_token_count:
+        min_token_count = min_token_count if min_token_count >= 0 else self.min_token_count
+        max_token_count = max_token_count if max_token_count >= 0 else self.max_token_count
+        if token_count < min_token_count or token_count > max_token_count:
            return False
        return True

@ -216,13 +247,13 @@ class IndexRepo(BaseModel):
                logger.debug(f"{path} not is_relative_to {root_path})")
                continue
            if not path.is_dir():
-                is_text, _ = await is_text_file(path)
+                is_text = await File.is_textual_file(path)
                if is_text:
                    pathnames.append(path)
                continue
            subfiles = list_files(path)
            for j in subfiles:
-                is_text, _ = await is_text_file(j)
+                is_text = await File.is_textual_file(j)
                if is_text:
                    pathnames.append(j)

@ -240,7 +271,7 @@ class IndexRepo(BaseModel):
            List[NodeWithScore]: A list of nodes with scores matching the query.
        """
        if not Path(self.persist_path).exists():
-            return []
+            raise ValueError(f"IndexRepo {Path(self.persist_path).name} not exists.")
        engine = SimpleEngine.from_index(
            index_config=FAISSIndexConfig(persist_path=self.persist_path), retriever_configs=[FAISSRetrieverConfig()]
        )
@ -262,3 +293,114 @@ class IndexRepo(BaseModel):
            return True
        fp = generate_fingerprint(content)
        return old_fp != fp
+
+    @staticmethod
+    def find_index_repo_path(files: List[Union[str, Path]]) -> Tuple[Dict[str, Set[Path]], Dict[str, str]]:
+        """Map the file path to the corresponding index repo.
+
+        Args:
+            files (List[Union[str, Path]]): A list of file paths or Path objects to be classified.
+
+        Returns:
+            Tuple[Dict[str, Set[Path]], Dict[str, str]]:
+                - A dictionary mapping the index repo path to the files.
+                - A dictionary mapping the index repo path to their corresponding root directories.
+        """
+        mappings = {
+            UPLOADS_INDEX_ROOT: re.compile(r"^/data/uploads($|/.*)"),
+            CHATS_INDEX_ROOT: re.compile(r"^/data/chats/\d+($|/.*)"),
+        }
+
+        clusters = {}
+        roots = {}
+        for i in files:
+            path = Path(i).absolute()
+            path_type = OTHER_TYPE
+            for type_, pattern in mappings.items():
+                if re.match(pattern, str(i)):
+                    path_type = type_
+                    break
+            if path_type == CHATS_INDEX_ROOT:
+                chat_id = path.parts[3]
+                path_type = str(Path(path_type) / chat_id)
+                roots[path_type] = str(Path(CHATS_ROOT) / chat_id)
+            elif path_type == UPLOADS_INDEX_ROOT:
+                roots[path_type] = UPLOAD_ROOT
+
+            if path_type in clusters:
+                clusters[path_type].add(path)
+            else:
+                clusters[path_type] = {path}
+
+        return clusters, roots
+
+    async def _save_meta(self):
+        meta = IndexRepoMeta(min_token_count=self.min_token_count, max_token_count=self.max_token_count)
+        await awrite(filename=Path(self.persist_path) / self.meta_filename, data=meta.model_dump_json())
+
+    async def _read_meta(self) -> IndexRepoMeta:
+        default_meta = IndexRepoMeta(min_token_count=self.min_token_count, max_token_count=self.max_token_count)
+
+        filename = Path(self.persist_path) / self.meta_filename
+        if not filename.exists():
+            return default_meta
+        meta_data = await aread(filename=filename)
+        try:
+            meta = IndexRepoMeta.model_validate_json(meta_data)
+            return meta
+        except Exception as e:
+            logger.warning(f"Load meta error: {e}")
+        return default_meta
+
+    @staticmethod
+    async def cross_repo_search(query: str, file_or_path: Union[str, Path]) -> List[str]:
+        """Search for a query across multiple repositories.
+
+        This asynchronous function searches for the specified query in files
+        located at the given path or file.
+
+        Args:
+            query (str): The search term to look for in the files.
+            file_or_path (Union[str, Path]): The path to the file or directory
+                where the search should be conducted. This can be a string path
+                or a Path object.
+
+        Returns:
+            List[str]: A list of strings containing the paths of files that
+            contain the query results.
+
+        Raises:
+            ValueError: If the query string is empty.
+        """
+        if not file_or_path or not Path(file_or_path).exists():
+            raise ValueError(f'"{str(file_or_path)}" not exists')
+        files = [file_or_path] if not Path(file_or_path).is_dir() else list_files(file_or_path)
+        clusters, roots = IndexRepo.find_index_repo_path(files)
+        futures = []
+        others = set()
+        for persist_path, filenames in clusters.items():
+            if persist_path == OTHER_TYPE:
+                others.update(filenames)
+                continue
+            root = roots[persist_path]
+            repo = IndexRepo(persist_path=persist_path, root_path=root)
+            futures.append(repo.search(query=query, filenames=list(filenames)))
+
+        for i in others:
+            futures.append(File.read_text_file(i))
+
+        futures_results = []
+        if futures:
+            futures_results = await asyncio.gather(*futures)
+
+        result = []
+        v_result = []
+        for i in futures_results:
+            if isinstance(i, str):
+                result.append(i)
+            else:
+                v_result.append(i)
+
+        repo = IndexRepo()
+        merged = await repo.merge(query=query, indices_list=v_result)
+        return [i.text for i in merged] + result
--- a/metagpt/utils/file.py
+++ b/metagpt/utils/file.py
@ -6,13 +6,19 @@
@File    : file.py
@Describe : General file operations.
 """
+import base64
 from pathlib import Path
+from typing import Optional, Tuple, Union

 import aiofiles
 from fsspec.implementations.memory import MemoryFileSystem as _MemoryFileSystem

+from metagpt.config2 import Config
 from metagpt.logs import logger
+from metagpt.utils import read_docx
+from metagpt.utils.common import aread, aread_bin, awrite_bin, check_http_endpoint
 from metagpt.utils.exceptions import handle_exception
+from metagpt.utils.repo_to_markdown import is_text_file


 class File:
@ -70,6 +76,125 @@ class File:
            logger.debug(f"Successfully read file, the path of file: {file_path}")
            return content

+    @staticmethod
+    async def is_textual_file(filename: Union[str, Path]) -> bool:
+        """Determines if a given file is a textual file.
+
+        A file is considered a textual file if it is plain text or has a
+        specific set of MIME types associated with textual formats,
+        including PDF and Microsoft Word documents.
+
+        Args:
+            filename (Union[str, Path]): The path to the file to be checked.
+
+        Returns:
+            bool: True if the file is a textual file, False otherwise.
+        """
+        is_text, mime_type = await is_text_file(filename)
+        if is_text:
+            return True
+        if mime_type == "application/pdf":
+            return True
+        if mime_type in {
+            "application/msword",
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            "application/vnd.ms-word.document.macroEnabled.12",
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
+            "application/vnd.ms-word.template.macroEnabled.12",
+        }:
+            return True
+        return False
+
+    @staticmethod
+    async def read_text_file(filename: Union[str, Path]) -> Optional[str]:
+        """Read the whole content of a file. Using absolute paths as the argument for specifying the file location."""
+        is_text, mime_type = await is_text_file(filename)
+        if is_text:
+            return await File._read_text(filename)
+        if mime_type == "application/pdf":
+            return await File._read_pdf(filename)
+        if mime_type in {
+            "application/msword",
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            "application/vnd.ms-word.document.macroEnabled.12",
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
+            "application/vnd.ms-word.template.macroEnabled.12",
+        }:
+            return await File._read_docx(filename)
+        return None
+
+    @staticmethod
+    async def _read_text(path: Union[str, Path]) -> str:
+        return await aread(path)
+
+    @staticmethod
+    async def _read_pdf(path: Union[str, Path]) -> str:
+        result = await File._omniparse_read_file(path)
+        if result:
+            return result
+
+        from llama_index.readers.file import PDFReader
+
+        reader = PDFReader()
+        lines = reader.load_data(file=Path(path))
+        return "\n".join([i.text for i in lines])
+
+    @staticmethod
+    async def _read_docx(path: Union[str, Path]) -> str:
+        result = await File._omniparse_read_file(path)
+        if result:
+            return result
+        return "\n".join(read_docx(str(path)))
+
+    @staticmethod
+    async def _omniparse_read_file(path: Union[str, Path], auto_save_image: bool = False) -> Optional[str]:
+        from metagpt.tools.libs import get_env_default
+        from metagpt.utils.omniparse_client import OmniParseClient
+
+        env_base_url = await get_env_default(key="base_url", app_name="OmniParse", default_value="")
+        env_timeout = await get_env_default(key="timeout", app_name="OmniParse", default_value="")
+        conf_base_url, conf_timeout = await File._read_omniparse_config()
+
+        base_url = env_base_url or conf_base_url
+        if not base_url:
+            return None
+        api_key = await get_env_default(key="api_key", app_name="OmniParse", default_value="")
+        timeout = env_timeout or conf_timeout or 600
+        try:
+            timeout = int(timeout)
+        except ValueError:
+            timeout = 600
+
+        try:
+            if not await check_http_endpoint(url=base_url):
+                logger.warning(f"{base_url}: NOT AVAILABLE")
+                return None
+            client = OmniParseClient(api_key=api_key, base_url=base_url, max_timeout=timeout)
+            file_data = await aread_bin(filename=path)
+            ret = await client.parse_document(file_input=file_data, bytes_filename=str(path))
+        except (ValueError, Exception) as e:
+            logger.exception(f"{path}: {e}")
+            return None
+        if not ret.images or not auto_save_image:
+            return ret.text
+
+        result = [ret.text]
+        img_dir = Path(path).parent / (Path(path).name.replace(".", "_") + "_images")
+        img_dir.mkdir(parents=True, exist_ok=True)
+        for i in ret.images:
+            byte_data = base64.b64decode(i.image)
+            filename = img_dir / i.image_name
+            await awrite_bin(filename=filename, data=byte_data)
+            result.append(f"![{i.image_name}]({str(filename)})")
+        return "\n".join(result)
+
+    @staticmethod
+    async def _read_omniparse_config() -> Tuple[str, int]:
+        config = Config.default()
+        if config.omniparse and config.omniparse.url:
+            return config.omniparse.url, config.omniparse.timeout
+        return "", 0
+

 class MemoryFileSystem(_MemoryFileSystem):
    @classmethod
--- a/tests/metagpt/environment/mgx_env/run_mgx_env.py
+++ b/tests/metagpt/environment/mgx_env/run_mgx_env.py
@ -8,7 +8,6 @@ from metagpt.environment.mgx.mgx_env import MGXEnv
 from metagpt.roles import Architect, Engineer, ProductManager, ProjectManager
 from metagpt.roles.di.data_analyst import DataAnalyst
 from metagpt.roles.di.engineer2 import Engineer2
-from metagpt.roles.di.swe_agent import SWEAgent
 from metagpt.roles.di.team_leader import TeamLeader
 from metagpt.schema import Message

@ -29,7 +28,6 @@ async def main(requirement="", enable_human_input=False, use_fixed_sop=False, al
            engineer,
            # QaEngineer(),
            DataAnalyst(),
-            SWEAgent(),
        ]
    )

--- a/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py
+++ b/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py
@ -1,16 +1,23 @@
+import argparse
 import asyncio
 import json
+import os
+import shutil
+import sys
 from datetime import datetime
+from pathlib import Path

 from metagpt.config2 import Config
 from metagpt.const import DEFAULT_WORKSPACE_ROOT, METAGPT_ROOT
 from metagpt.logs import logger
-from metagpt.roles.di.swe_agent import SWEAgent
+from metagpt.roles.di.engineer2 import Engineer2
+from metagpt.tools.libs.editor import Editor
 from metagpt.tools.libs.terminal import Terminal
 from metagpt.tools.swe_agent_commands.swe_agent_utils import load_hf_dataset

 config = Config.default()
 # Specify by yourself
+GLOBAL_TERMINAL = Terminal()
 TEST_REPO_DIR = METAGPT_ROOT / "data" / "test_repo"
 DATA_DIR = METAGPT_ROOT / "data/hugging_face"

@ -51,20 +58,61 @@ def check_instance_status(instance, swe_result_dir):
    return True


-async def run(instance, swe_result_dir):
+async def terminal_run_command(cmd):
+    cmd_output = await GLOBAL_TERMINAL.run_command(cmd)
+    logger.info(f"command:{cmd} output:\n {cmd_output}")
+    return cmd_output
+
+
+async def refresh_repo(instance, test_repo_dir, reclone_existing_repo=False):
+    repo_path = Path(test_repo_dir) / (
+        instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"]
+    )
+    repo_identifier = instance["repo"]
+    base_commit = instance["base_commit"]
+    if os.path.exists(repo_path) and reclone_existing_repo is True:
+        logger.info(f"remove exist repo path:{repo_path}")
+        shutil.rmtree(repo_path)
+
+    if os.path.exists(repo_path):
+        logger.info(f"reset exist repo path:{repo_path}")
+        await terminal_run_command(f"cd {repo_path} && git reset --hard && git clean -n -d && git clean -f -d")
+        await terminal_run_command("BRANCH=$(git remote show origin | awk '/HEAD branch/ {print $NF}')")
+        await terminal_run_command("echo $BRANCH")
+        await terminal_run_command('git checkout "$BRANCH"')
+    else:
+        logger.info(f"clone repo to path:{repo_path}")
+        clone_command = f"git clone 'https://github.com/{repo_identifier}.git' {repo_path}"
+        checkout_command = f"cd {repo_path} " + "&& git checkout -f {base_commit}" if base_commit else ""
+        await terminal_run_command(clone_command)
+        await terminal_run_command(checkout_command)
+
+    await terminal_run_command("git branch")
+    # ignore backup file
+    await terminal_run_command("echo '.backup.*' >> .gitignore")
+
+    return repo_path
+
+
+async def get_git_diff():
+    git_diff = ""
+    try:
+        await terminal_run_command("git add -A")
+        git_diff = await terminal_run_command("git diff --cached")
+    except Exception as e:
+        logger.error(f"Error during submission: {e}")
+    return git_diff
+
+
+async def run(instance, swe_result_dir, args):
    if not check_instance_status(instance, swe_result_dir):
        logger.info(f"Instance {instance['instance_id']} already exists, skipping execution.")
        return

-    repo_path = TEST_REPO_DIR / (instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"])
-
-    # 前处理
-    terminal = Terminal()
-    await terminal.run_command(f"cd {repo_path} && git reset --hard && git clean -n -d && git clean -f -d")
-    await terminal.run_command("BRANCH=$(git remote show origin | awk '/HEAD branch/ {print $NF}')")
-    logger.info(await terminal.run_command("echo $BRANCH"))
-    logger.info(await terminal.run_command('git checkout "$BRANCH"'))
-    logger.info(await terminal.run_command("git branch"))
+    # preparation for the repo
+    logger.info(f"**** Preparing to run {instance['instance_id']}****")
+    test_repo_dir = args.test_repo_dir
+    repo_path = await refresh_repo(instance, test_repo_dir, args.reclone_existing_repo)

    user_requirement_and_issue = INSTANCE_TEMPLATE.format(
        issue=instance["problem_statement"],
@ -75,18 +123,22 @@ async def run(instance, swe_result_dir):
    )

    logger.info(f"**** Starting to run {instance['instance_id']}****")
-    swe_agent = SWEAgent()
-    swe_agent.run_eval = True
-    await swe_agent.run(user_requirement_and_issue)
-    save_predictions(swe_agent, instance, swe_result_dir)
+    logger.info("User Requirement", user_requirement_and_issue)
+    try:
+        engineer = Engineer2(run_eval=True, editor=Editor(enable_auto_lint=True))
+        await asyncio.wait_for(engineer.run(user_requirement_and_issue), timeout=args.max_wait_time_per_case * 60)
+    except Exception as e:
+        logger.warning(f"**** exception lead to end: {instance['instance_id']}****\n\nerror:{e}")
+    # save the difference of repo
+    await save_predictions(engineer, instance, swe_result_dir)
    logger.info(f"**** Finished running {instance['instance_id']}****")


-def save_predictions(swe_agent: SWEAgent, instance, swe_result_dir):
+async def save_predictions(engineer, instance, swe_result_dir):
    output_file = swe_result_dir / "all_preds.jsonl"
-    instance["model_name_or_path"] = swe_agent.config.llm.model
-    instance["model_patch"] = swe_agent.output_diff
-
+    instance["model_name_or_path"] = engineer.config.llm.model
+    instance["model_patch"] = await get_git_diff()
+    logger.info(f"'model_patch':\n{instance['model_patch']}")
    logger.info(f"Preparing to save predictions to {output_file}")

    # Save the predictions to a JSONL file
@ -96,19 +148,61 @@ def save_predictions(swe_agent: SWEAgent, instance, swe_result_dir):
    logger.info(f"Saved prediction of {instance['instance_id']} to {output_file}")


-async def async_main():
+async def async_main(args):
    dataset_path = "manna-ai/SWE-bench_Nano"  # "princeton-nlp/SWE-bench_Lite" #"manna-ai/SWE-bench_Nano"
-
    dataset = load_hf_dataset(dataset_name_or_path=dataset_path, cache_dir=DATA_DIR, split="test")
-    date_time = datetime.now().strftime("%m%d")
-    _round = "first"
-    # _round = "second"
-    exp_name = f"nano_mgx_{date_time}_{_round}"
-    swe_result_dir = DEFAULT_WORKSPACE_ROOT / f"result_{config.llm.model.replace('/', '_')}" / exp_name
+    swe_result_dir = Path(args.save_folder)
+    if swe_result_dir.exists():
+        logger.info(f"{swe_result_dir} exists; resuming test from last checkpoint.")
    swe_result_dir.mkdir(parents=True, exist_ok=True)
-    for instance in dataset:
-        await run(instance, swe_result_dir)
+    for index, instance in enumerate(dataset):
+        # switch to a new logger file
+        logger.remove()
+        logger.add(sys.stderr, level="INFO")
+        logger.add(swe_result_dir / "logs" / f"{index+1}_{instance['instance_id']}.log", level="DEBUG")
+        await run(instance, swe_result_dir, args)


 if __name__ == "__main__":
-    asyncio.run(async_main())
+    parser = argparse.ArgumentParser(description="the argument of scripts")
+    # 添加参数
+    swe_result_dir = (
+        DEFAULT_WORKSPACE_ROOT
+        / f"result_{config.llm.model.replace('/', '_')}_start_time_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S') }"
+    )
+    test_repo_dir = TEST_REPO_DIR.absolute()
+    swe_result_dir = swe_result_dir.absolute()
+    parser.add_argument(
+        "-rw", "--test_repo_dir", default=test_repo_dir, help="The directory to save temporary repositories", type=str
+    )
+    parser.add_argument("-s", "--save_folder", default=swe_result_dir, help="Folder to save results and logs", type=str)
+    parser.add_argument(
+        "-mwtc", "--max_wait_time_per_case", help="Maximum wait time allowed per test case (in minutes)", type=int
+    )
+    parser.add_argument(
+        "-o",
+        "--reclone_existing_repo",
+        action="store_true",
+        help="If set, the existing repository will be removed and recloned.",
+    )
+    # 解析命令行参数
+    args = parser.parse_args()
+    asyncio.run(async_main(args))
+
+
+"""
+#
+python tests/metagpt/roles/di/run_swe_agent_for_benchmark.py \
+--test_repo_dir "./data/test_repo" \
+--save_folder "./workspace/deepseek_coder_0907" \
+--max_wait_time_per_case 10 
+"""
+
+"""
+# 重新克隆仓库
+python tests/metagpt/roles/di/run_swe_agent_for_benchmark.py \
+--test_repo_dir "./data/test_repo" \
+--save_folder "./workspace/deepseek_coder_0907" \
+--max_wait_time_per_case 10 \
+--reclone_existing_repo
+"""
--- a/tests/metagpt/tools/libs/test_editor.py
+++ b/tests/metagpt/tools/libs/test_editor.py
@ -1,7 +1,19 @@
+import os
+import shutil
+from pathlib import Path
+
 import pytest

 from metagpt.const import TEST_DATA_PATH
 from metagpt.tools.libs.editor import Editor
+from metagpt.tools.libs.index_repo import (
+    CHATS_INDEX_ROOT,
+    CHATS_ROOT,
+    UPLOAD_ROOT,
+    UPLOADS_INDEX_ROOT,
+    IndexRepo,
+)
+from metagpt.utils.common import list_files

 TEST_FILE_CONTENT = """
 # this is line one
@ -645,5 +657,54 @@ def test_append_to_single_empty_line_file():
    assert n_added_lines == 1


+async def mock_index_repo():
+    chat_id = "1"
+    chat_path = Path(CHATS_ROOT) / chat_id
+    chat_path.mkdir(parents=True, exist_ok=True)
+    src_path = TEST_DATA_PATH / "requirements"
+    command = f"cp -rf {str(src_path)} {str(chat_path)}"
+    os.system(command)
+    filenames = list_files(chat_path)
+    chat_files = [i for i in filenames if Path(i).suffix in {".md", ".txt", ".json", ".pdf"}]
+    chat_repo = IndexRepo(
+        persist_path=str(Path(CHATS_INDEX_ROOT) / chat_id), root_path=str(chat_path), min_token_count=0
+    )
+    await chat_repo.add(chat_files)
+    assert chat_files
+
+    Path(UPLOAD_ROOT).mkdir(parents=True, exist_ok=True)
+    command = f"cp -rf {str(src_path)} {str(UPLOAD_ROOT)}"
+    os.system(command)
+    filenames = list_files(UPLOAD_ROOT)
+    uploads_files = [i for i in filenames if Path(i).suffix in {".md", ".txt", ".json", ".pdf"}]
+    uploads_repo = IndexRepo(persist_path=UPLOADS_INDEX_ROOT, root_path=UPLOAD_ROOT, min_token_count=0)
+    await uploads_repo.add(uploads_files)
+    assert uploads_files
+
+    filenames = list_files(src_path)
+    other_files = [i for i in filenames if Path(i).suffix in {".md", ".txt", ".json", ".pdf"}]
+    assert other_files
+
+    return chat_path, UPLOAD_ROOT, src_path
+
+
+@pytest.mark.skip
+@pytest.mark.asyncio
+async def test_index_repo():
+    # mock data
+    chat_path, UPLOAD_ROOT, src_path = await mock_index_repo()
+
+    editor = Editor()
+    rsp = await editor.search_index_repo(query="业务线", file_or_path=chat_path)
+    assert rsp
+    rsp = await editor.search_index_repo(query="业务线", file_or_path=UPLOAD_ROOT)
+    assert rsp
+    rsp = await editor.search_index_repo(query="业务线", file_or_path=src_path)
+    assert rsp
+
+    shutil.rmtree(CHATS_ROOT)
+    shutil.rmtree(UPLOAD_ROOT)
+
+
 if __name__ == "__main__":
    pytest.main([__file__, "-s"])
--- a/tests/metagpt/tools/libs/test_index_repo.py
+++ b/tests/metagpt/tools/libs/test_index_repo.py
@ -1,11 +1,17 @@
 import shutil
+from pathlib import Path

 import pytest

 from metagpt.const import DEFAULT_WORKSPACE_ROOT, TEST_DATA_PATH
-from metagpt.tools.libs.index_repo import IndexRepo
+from metagpt.tools.libs.index_repo import (
+    CHATS_INDEX_ROOT,
+    UPLOADS_INDEX_ROOT,
+    IndexRepo,
+)


+@pytest.mark.skip
@pytest.mark.asyncio
@pytest.mark.parametrize(("path", "query"), [(TEST_DATA_PATH / "requirements", "业务线")])
 async def test_index_repo(path, query):
@ -28,5 +34,22 @@ async def test_index_repo(path, query):
    shutil.rmtree(index_path)


+@pytest.mark.parametrize(
+    ("paths", "path_type", "root"),
+    [
+        (["/data/uploads"], UPLOADS_INDEX_ROOT, "/data/uploads"),
+        (["/data/uploads/"], UPLOADS_INDEX_ROOT, "/data/uploads"),
+        (["/data/chats/1/1.txt"], str(Path(CHATS_INDEX_ROOT) / "1"), "/data/chats/1"),
+        (["/data/chats/1/2.txt"], str(Path(CHATS_INDEX_ROOT) / "1"), "/data/chats/1"),
+        (["/data/chats/2/2.txt", "/data/chats/2/2.txt"], str(Path(CHATS_INDEX_ROOT) / "2"), "/data/chats/2"),
+        (["/data/chats.txt"], "other", ""),
+    ],
+)
+def test_classify_path(paths, path_type, root):
+    result, result_root = IndexRepo.classify_path(paths)
+    assert path_type in set(result.keys())
+    assert root == result_root.get(path_type, "")
+
+
 if __name__ == "__main__":
    pytest.main([__file__, "-s"])