From c2f9f010b7ef4eb13a641fb9b5a201d3704ae585 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Thu, 5 Sep 2024 16:48:46 +0800 Subject: [PATCH] update_engineer_and_editorprompt --- metagpt/environment/mgx/mgx_env.py | 2 +- metagpt/prompts/di/engineer2.py | 23 +-- metagpt/prompts/di/role_zero.py | 1 + metagpt/prompts/di/team_leader.py | 2 - metagpt/roles/di/engineer2.py | 92 +++-------- metagpt/roles/di/role_zero.py | 50 +++--- metagpt/strategy/experience_retriever.py | 4 +- metagpt/tools/libs/editor.py | 151 +++++++++++------- .../environment/mgx_env/run_mgx_env.py | 2 - .../roles/di/run_swe_agent_for_benchmark.py | 143 ++++++++++++----- 10 files changed, 251 insertions(+), 219 deletions(-) diff --git a/metagpt/environment/mgx/mgx_env.py b/metagpt/environment/mgx/mgx_env.py index 8bb3fc823..4df04d3ce 100644 --- a/metagpt/environment/mgx/mgx_env.py +++ b/metagpt/environment/mgx/mgx_env.py @@ -96,7 +96,7 @@ class MGXEnv(Environment, SerializationMixin): async def reply_to_human(self, content: str, sent_from: Role = None) -> str: # NOTE: Can be overwritten in remote setting - return "SUCCESS, human has received your reply. Refrain from resending duplicate messages." + return "SUCCESS, human has received your reply. Refrain from resending duplicate messages. If you no longer need to take action, use the command ‘end’ to stop." def message_within_software_sop(self, message: Message) -> bool: # Engineer, QaEngineer can be end of the SOP. Their msg requires routing outside. diff --git a/metagpt/prompts/di/engineer2.py b/metagpt/prompts/di/engineer2.py index 3108a7cbd..113551212 100644 --- a/metagpt/prompts/di/engineer2.py +++ b/metagpt/prompts/di/engineer2.py @@ -1,4 +1,4 @@ -from metagpt.prompts.di.role_zero import CMD_PROMPT, ROLE_INSTRUCTION +from metagpt.prompts.di.role_zero import ROLE_INSTRUCTION EXTRA_INSTRUCTION = """ You are an autonomous programmer @@ -58,7 +58,7 @@ Note: - Enclose terms like `def` or `class` in quotes when searching for functions or classes (e.g., `search_dir 'def apow'` or `search_file 'class Pow'`). - Use wildcard characters (`*`, `?`) in search terms to broaden or narrow down your search scope. - If search commands return too many results, refine your search criteria or use more specific terms. - - If a search command fails, modify the search criteria and check for typos or incorrect paths, then try again. + - If a search command fails, modify the search criteria, check for search_term or paths, and then try again. - Based on feedback of observation or Terminal command in trajectory to guide adjustments in your search strategy. 9. When the edit fails, try to enlarge the range of code. @@ -75,23 +75,16 @@ Note: 18. Use Engineer2.write_new_code to create or modify a file. Write only one code file each time. If you only need to code one file, provide all the necessary information in one response. 19. When the requirement is simple, you don't need to create a plan, just do it right away. 20. If the code exists, use the Editor tool's open and edit commands to modify it. Since it is not a new code, do not use write_new_code. -21. Aways user absolute path as parameter. if no specific root path given, use "workspace/'project_name'" as default work space. -22. Forbidden to run code in the terminal. +21. Forbidden to run code in the terminal. +22. When using the editor, pay attention to the editor's current directory. When you use editor tools, the paths must be either absolute or relative to the editor's current directory. """ -ENGINEER2_CMD_PROMPT = ( - CMD_PROMPT - + "\nUsing Editor.insert_content_at_line and Editor.edit_file_by_replace more than once in the current command list is forbidden. Because the command is mutually exclusive and will change the line number after execution." -) -CURRENT_EDITOR_STATE = """ +CURRENT_STATE = """ The current editor state is: -(Open file: {open_file}) -(Current directory: {working_dir}) -""" - -CURRENT_TERMINAL_STATE = """ +(Editor current directory: {editor_current_directory}) +(Editor open file: {editor_open_file}) The current terminal state is: -(Current directory: {working_dir}) +(Terminal current directory: {terminal_current_directory}) """ ENGINEER2_INSTRUCTION = ROLE_INSTRUCTION + EXTRA_INSTRUCTION.strip() diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 3029735ba..a6b111d95 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -71,6 +71,7 @@ Pay close attention to the Example provided, you can reuse the example for your You may use any of the available commands to create a plan or update the plan. You may output mutiple commands, they will be executed sequentially. If you finish current task, you will automatically take the next task in the existing plan, use Plan.finish_task, DON'T append a new task. Review the latest plan's outcome, focusing on achievements. If your completed task matches the current, consider it finished. +Using Editor.insert_content_at_line and Editor.edit_file_by_replace more than once in the current command list is forbidden. Because the command is mutually exclusive and will change the line number after execution. In your response, include at least one command. # Your commands in a json array, in the following output format with correct command_name and args. If there is nothing to do, use the pass or end command: diff --git a/metagpt/prompts/di/team_leader.py b/metagpt/prompts/di/team_leader.py index e5c119dc8..8d85a8cf3 100644 --- a/metagpt/prompts/di/team_leader.py +++ b/metagpt/prompts/di/team_leader.py @@ -24,7 +24,6 @@ Note: - XL: Social media platform, e-commerce app, real-time multiplayer game - For XS and S requirements, you don't need the standard software development process, you may directly ask Engineer to write the code. Otherwise, estimate if any part of the standard software development process may contribute to a better final code. If so, assign team members accordingly. 3.1 If the task involves code review (CR) or code checking, you should assign it to Engineer. -3.2. If the requirement is to fix a bug or issue, you should assign it to Issue Solver. However, if the code is written by Engineer, Engineer must maintain the code. 4. If the requirement is a common-sense, logical, or math problem, you should respond directly without assigning any task to team members. 5. If you think the requirement is not clear or ambiguous, you should ask the user for clarification immediately. Assign tasks only after all info is clear. 6. It is helpful for Engineer to have both the system design and the project schedule for writing the code, so include paths of both files (if available) and remind Engineer to definitely read them when publishing message to Engineer. @@ -43,7 +42,6 @@ Sixth, describe the requirements as they pertain to software development, data a Seventh, describe the technologies you must use. """ ) - TL_INFO = """ {role_info} Your team member: diff --git a/metagpt/roles/di/engineer2.py b/metagpt/roles/di/engineer2.py index 92ecb633d..9ec22c077 100644 --- a/metagpt/roles/di/engineer2.py +++ b/metagpt/roles/di/engineer2.py @@ -1,18 +1,14 @@ from __future__ import annotations -import os from pathlib import Path from pydantic import Field -from metagpt.config2 import Config from metagpt.logs import logger # from metagpt.actions.write_code_review import ValidateAndRewriteCode from metagpt.prompts.di.engineer2 import ( - CURRENT_EDITOR_STATE, - CURRENT_TERMINAL_STATE, - ENGINEER2_CMD_PROMPT, + CURRENT_STATE, ENGINEER2_INSTRUCTION, WRITE_CODE_PROMPT, WRITE_CODE_SYSTEM_PROMPT, @@ -33,7 +29,6 @@ class Engineer2(RoleZero): profile: str = "Engineer" goal: str = "Take on game, app, and web development." instruction: str = ENGINEER2_INSTRUCTION - cmd_prompt: str = ENGINEER2_CMD_PROMPT terminal: Terminal = Field(default_factory=Terminal, exclude=True) tools: list[str] = [ @@ -58,52 +53,38 @@ class Engineer2(RoleZero): async def _format_instruction(self): """ - Formats the instruction message for the Engineer2. - Uses Editor's state to format the `_instruction` template. + Display the current terminal and editor state. + This information will be dynamically added to the command prompt. """ - bash_working_dir = await self.terminal.run_command("pwd") - bash_state = {"working_dir": bash_working_dir} - editor_state = {"open_file": self.editor.current_file, "working_dir": self.editor.working_dir} - self.cmd_prompt_current_state = CURRENT_EDITOR_STATE.format( - **editor_state - ).strip() + CURRENT_TERMINAL_STATE.format(**bash_state) + state = { + "editor_open_file": self.editor.current_file, + "editor_current_directory": self.editor.working_dir, + "terminal_current_directory": await self.terminal.run_command("pwd"), + } + self.cmd_prompt_current_state = CURRENT_STATE.format(**state).strip() def _update_tool_execution(self): self.tool_execution_map.update( { - "Terminal.run_command": self.eval_terminal_run if self.run_eval else self.terminal.run_command, + "Terminal.run_command": self.terminal.run_command, "git_create_pull": git_create_pull, "Engineer2.write_new_code": self.write_new_code, # "ValidateAndRewriteCode.run": validate.run, # "ValidateAndRewriteCode": validate.run, } ) + self.exclusive_tool_commands.append("Engineer2.write_new_code") if self.run_eval: self.tool_execution_map.update( { + "Terminal.run_command": self._eval_terminal_run, "RoleZero.ask_human": self._end, "RoleZero.reply_to_human": self._end, } ) - async def eval_terminal_run(self, cmd): - """change command pull/push/commit to end.""" - if any([cmd_key_word in cmd for cmd_key_word in ["pull", "push", "commit"]]): - # The Engineer2 attempts to submit the repository after fixing the bug, thereby reaching the end of the fixing process. - # Set self.rc.todo to None to stop the engineer and then will trigger _save_git_diff funcion to save difference. - logger.info("Engineer2 use cmd:{cmd}") - logger.info("Current test case is finished.") - # stop the Engineer2 - self._set_state(-1) - command_output = "Current test case is finished." - else: - command_output = await self.terminal.run_command(cmd) - return command_output - async def _act(self) -> Message: message = await super()._act() - if self.run_eval: - await self._save_git_diff() return message def _retrieve_experience(self) -> str: @@ -119,44 +100,6 @@ class Engineer2(RoleZero): command_output += await super()._run_special_command(cmd) return command_output - async def _save_git_diff(self): - """ - Handles actions based on parsed commands. - - When detecting engineer2 at the final action round, the process will stop immediately. - generates a patch using `git diff`. - Stores the cleaned patch in `output_diff`. Logs any exceptions. - - This function is specifically added for SWE bench evaluation. - """ - # If todo switches to None, it indicates that this is the final round of reactions, and the Engineer2 will stop. Use git diff to store any changes made. - if not self.rc.todo: - from metagpt.tools.swe_agent_commands.swe_agent_utils import extract_patch - - try: - logger.info(await self.submit()) - diff_output = await self.terminal.run_command("git diff --cached") - clear_diff = extract_patch(diff_output) - logger.info(f"Diff output: \n{clear_diff}") - if clear_diff: - self.output_diff = clear_diff - except Exception as e: - logger.error(f"Error during submission: {e}") - - async def submit(self): - if "SWE_CMD_WORK_DIR" not in os.environ: - os.environ["SWE_CMD_WORK_DIR"] = str(Config.default().workspace.path) - if os.path.exists(os.environ["SWE_CMD_WORK_DIR"] + "/test.patch"): - await self.terminal.run_command('git apply -R < "$SWE_CMD_WORK_DIR/test.patch"') - cmd = """ - git add -A - echo "<>" - """ - diff_output = await self.terminal.run_command(cmd) - return diff_output - async def write_new_code(self, path: str, instruction: str = "") -> str: """Write a new code file. Args: @@ -180,3 +123,14 @@ class Engineer2(RoleZero): # TODO: Consider adding line no to be ready for editing. return f"The file {path} has been successfully created, with content:\n{code}" + + async def _eval_terminal_run(self, cmd): + """change command pull/push/commit to end.""" + if any([cmd_key_word in cmd for cmd_key_word in ["pull", "push", "commit"]]): + # The Engineer2 attempts to submit the repository after fixing the bug, thereby reaching the end of the fixing process. + logger.info("Engineer2 use cmd:{cmd}\nCurrent test case is finished.") + # Set self.rc.todo to None to stop the engineer. + self._set_state(-1) + else: + command_output = await self.terminal.run_command(cmd) + return command_output diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 0e8d005e7..9d309bfc6 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -73,6 +73,7 @@ class RoleZero(Role): tool_recommender: Optional[ToolRecommender] = None tool_execution_map: Annotated[dict[str, Callable], Field(exclude=True)] = {} special_tool_commands: list[str] = ["Plan.finish_current_task", "end", "Bash.run"] + # List of exclusive tool commands exclusive_tool_commands: list[str] = [ "Editor.edit_file_by_replace", "Editor.insert_content_at_line", @@ -420,11 +421,13 @@ class RoleZero(Role): # Set the exclusive command flag to False. command_flag = [command["command_name"] not in self.exclusive_tool_commands for command in commands] if command_flag.count(False) > 1: - # Set the flag of the first exclusive command to True. + # Keep only the first exclusive command index_of_first_exclusive = command_flag.index(False) - command_flag[index_of_first_exclusive] = True - # Select command which flag is True. - commands = [commands[index] for index, flag in enumerate(command_flag) if flag is True] + commands = [ + cmd + for index, cmd in enumerate(commands) + if index == index_of_first_exclusive or cmd["command_name"] not in self.exclusive_tool_commands + ] command_rsp = "```json\n" + json.dumps(commands, indent=4, ensure_ascii=False) + "\n```json" logger.info( "exclusive command more than one in current command list. change the command list.\n" + command_rsp @@ -436,14 +439,14 @@ class RoleZero(Role): for cmd in commands: output = f"Command {cmd['command_name']} executed" # handle special command first - try: - if self._is_special_command(cmd): - special_command_output = await self._run_special_command(cmd) - outputs.append(output + ":" + special_command_output) - continue - # run command as specified by tool_execute_map - if cmd["command_name"] in self.tool_execution_map: - tool_obj = self.tool_execution_map[cmd["command_name"]] + if self._is_special_command(cmd): + special_command_output = await self._run_special_command(cmd) + outputs.append(output + ":" + special_command_output) + continue + # run command as specified by tool_execute_map + if cmd["command_name"] in self.tool_execution_map: + tool_obj = self.tool_execution_map[cmd["command_name"]] + try: if inspect.iscoroutinefunction(tool_obj): tool_output = await tool_obj(**cmd["args"]) else: @@ -451,14 +454,14 @@ class RoleZero(Role): if tool_output: output += f": {str(tool_output)}" outputs.append(output) - else: - outputs.append(f"Command {cmd['command_name']} not found.") - break - except Exception as e: - tb = traceback.format_exc() - logger.exception(str(e) + tb) - outputs.append(output + f": {tb}") - break # Stop executing if any command fails + except Exception as e: + tb = traceback.format_exc() + logger.exception(str(e) + tb) + outputs.append(output + f": {tb}") + break # Stop executing if any command fails + else: + outputs.append(f"Command {cmd['command_name']} not found.") + break outputs = "\n\n".join(outputs) return outputs @@ -536,11 +539,8 @@ class RoleZero(Role): from metagpt.environment.mgx.mgx_env import MGXEnv # avoid circular import if not isinstance(self.rc.env, MGXEnv): - rsp = "Not in MGXEnv, command will not be executed." - else: - rsp = await self.rc.env.reply_to_human(content, sent_from=self) - rsp += " If you no longer need to take action, use the command ‘end’ to stop." - return rsp + return "Not in MGXEnv, command will not be executed." + return await self.rc.env.reply_to_human(content, sent_from=self) async def _end(self, **kwarg): self._set_state(-1) diff --git a/metagpt/strategy/experience_retriever.py b/metagpt/strategy/experience_retriever.py index 65fb0c53a..4c649f0d2 100644 --- a/metagpt/strategy/experience_retriever.py +++ b/metagpt/strategy/experience_retriever.py @@ -958,7 +958,7 @@ I have opened the openai_api.py file. However, the range of lines shown is from ## example 9 I've found the bug and will start fixing it. I'll pay close attention to the indentation. Since I only need to modify a few lines in this file, I will use Editor.edit_file_by_replace. The original content will be replaced by the new code. -Note that the edit command must be executed in a single response, so this step will only involve using the edit command. +Editor tool is exclusive. If I use this tool, I cannot use any other commands in the current response. ```json [ { @@ -974,7 +974,7 @@ Note that the edit command must be executed in a single response, so this step w ## example 10 I only need to add a few lines to the file, so I will use Editor.insert_content_at_line. The new code will not cover the original code. -Note that the edit command must be executed in a single response, so this step will only involve using the edit command. +Note that the Editor command must be executed in a single response, so this step will only involve using the Editor command. ```json [ { diff --git a/metagpt/tools/libs/editor.py b/metagpt/tools/libs/editor.py index e7fc5f0a1..173322a66 100644 --- a/metagpt/tools/libs/editor.py +++ b/metagpt/tools/libs/editor.py @@ -24,10 +24,47 @@ from metagpt.utils.repo_to_markdown import is_text_file from metagpt.utils.report import EditorReporter # This is also used in unit tests! -MSG_FILE_UPDATED = "[File updated (edited at line {line_number}). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]" LINTER_ERROR_MSG = "[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n" +INDENTATION_INFO = """ +The previous line is: +"{pre_line}" +The indentation has {pre_line_indent} spaces. + +The error line is: +"{insert_line}" +The indentation has {insert_line_indent} spaces. + +Please check the indentation of the code to ensure that it is not causing any errors. +Try using indentation with either {sub_4_space} or {add_4_space} spaces. +""" + +ERROR_GUIDANCE = """ +{linter_error_msg} + +[This is how your edit would have looked if applied] +------------------------------------------------- +{window_after_applied} +------------------------------------------------- + +[This is the original code before your edit] +------------------------------------------------- +{window_before_applied} +------------------------------------------------- + +Your changes have NOT been applied. Please fix your edit command and try again +{guidance_message} + +""" + +SUCCESS_EDITE_INFO = """ +[File: {file_name} ({n_total_lines} lines total after edit)] +{window_after_applied} +[File updated (edited at line {line_number}). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] +""" + + class FileBlock(BaseModel): """A block of content in a file""" @@ -499,15 +536,24 @@ class Editor(BaseModel): content = "".join(new_lines) return content, n_added_lines - def get_indentation_infromation(self, content, first_error_line): + def _get_indentation_info(self, content, first_error_line): + """ + Information about the current edit's indentation. + Includes guidance on how to fix it. + """ content_lines = content.split("\n") - previous_line = content_lines[first_error_line - 2] if first_error_line - 2 >= 0 else "" - first_insert_line = content_lines[first_error_line - 1] - ret_str = f'the privous line is "{previous_line}", the indentation has {len(previous_line)-len(previous_line.lstrip())} space\n' - insert_line_indentation = len(first_insert_line) - len(first_insert_line.lstrip()) - ret_str += f'the error line is "{first_insert_line}", the indentation has {insert_line_indentation} space\n' - ret_str += "Please check the indentation of the code to ensure that it is not causing any errors.\n" - ret_str += f"Try to use indentation that has {insert_line_indentation-4 if insert_line_indentation-4 >0 else 0} or {insert_line_indentation+4} space" + pre_line = content_lines[first_error_line - 2] if first_error_line - 2 >= 0 else "" + pre_line_indent = len(pre_line) - len(pre_line.lstrip()) + insert_line = content_lines[first_error_line - 1] + insert_line_indent = len(insert_line) - len(insert_line.lstrip()) + ret_str = INDENTATION_INFO.format( + pre_line=pre_line, + pre_line_indent=pre_line_indent, + insert_line=insert_line, + insert_line_indent=insert_line_indent, + sub_4_space=max(insert_line_indent - 4, 0), + add_4_space=insert_line_indent + 4, + ) return ret_str def _edit_file_impl( @@ -529,7 +575,6 @@ class Editor(BaseModel): is_insert: bool = False: Whether to insert content at the given line number instead of editing. is_append: bool = False: Whether to append content to the file instead of editing. """ - ret_str = "" ERROR_MSG = f"[Error editing file {file_name}. Please confirm the file is correct.]" ERROR_MSG_SUFFIX = ( @@ -579,14 +624,12 @@ class Editor(BaseModel): try: content, n_added_lines = self._insert_impl(lines, start, content) except LineNumberError as e: - ret_str += (f"{ERROR_MSG}\n" f"{e}\n" f"{ERROR_MSG_SUFFIX}") + "\n" - return ret_str + return (f"{ERROR_MSG}\n" f"{e}\n" f"{ERROR_MSG_SUFFIX}") + "\n" else: try: content, n_added_lines = self._edit_impl(lines, start, end, content) except LineNumberError as e: - ret_str += (f"{ERROR_MSG}\n" f"{e}\n" f"{ERROR_MSG_SUFFIX}") + "\n" - return ret_str + return (f"{ERROR_MSG}\n" f"{e}\n" f"{ERROR_MSG_SUFFIX}") + "\n" if not content.endswith("\n"): content += "\n" @@ -646,66 +689,52 @@ class Editor(BaseModel): else: raise ValueError("Invalid state. This should never happen.") - ret_str += LINTER_ERROR_MSG - ret_str += lint_error + "\n" - - editor_lines = n_added_lines + 20 - - ret_str += "[This is how your edit would have looked if applied]\n" - ret_str += "-------------------------------------------------\n" - ret_str += self._print_window(file_name, show_line, editor_lines) + "\n" - ret_str += "-------------------------------------------------\n\n" - - ret_str += "[This is the original code before your edit]\n" - ret_str += "-------------------------------------------------\n" - ret_str += ( - self._print_window( - original_file_backup_path, - show_line, - editor_lines, - ) - + "\n" - ) - ret_str += "-------------------------------------------------\n" - - ret_str += self.get_indentation_infromation(content, start or len(lines)) - - ret_str += ( - "Your changes have NOT been applied. Please fix your edit command and try again.\n" + guidance_message = self._get_indentation_info(content, start or len(lines)) + guidance_message += ( "You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n" "DO NOT re-run the same failed edit command. Running it again will lead to the same error." ) + lint_error_info = ERROR_GUIDANCE.format( + linter_error_msg=LINTER_ERROR_MSG + lint_error, + window_after_applied=self._print_window(file_name, show_line, n_added_lines + 20), + window_before_applied=self._print_window( + original_file_backup_path, show_line, n_added_lines + 20 + ), + guidance_message=guidance_message, + ).strip() # recover the original file with original_file_backup_path.open() as fin, file_name.open("w") as fout: fout.write(fin.read()) original_file_backup_path.unlink() - return ret_str + return lint_error_info except FileNotFoundError as e: - ret_str += f"File not found: {e}\n" + return f"File not found: {e}\n" except IOError as e: - ret_str += f"An error occurred while handling the file: {e}\n" + return f"An error occurred while handling the file: {e}\n" except ValueError as e: - ret_str += f"Invalid input: {e}\n" + return f"Invalid input: {e}\n" except Exception as e: - error_str = "[This is how your edit would have looked if applied]\n" - error_str += "-------------------------------------------------\n" - error_str += self._print_window(file_name, start or len(lines), 40) + "\n" - error_str += "-------------------------------------------------\n" - error_str += self.get_indentation_infromation(content, start or len(lines)) - if not is_insert and not is_append: - error_str += "enlarge the range of original code." - error_str += "\nTry to enlarge the range of the orginal code" + guidance_message = self._get_indentation_info(content, start or len(lines)) + guidance_message += ( + "You either need to 1) Specify the correct start/end line arguments or 2) Enlarge the range of original code.\n" + "DO NOT re-run the same failed edit command. Running it again will lead to the same error." + ) + error_info = ERROR_GUIDANCE.format( + linter_error_msg=LINTER_ERROR_MSG + str(e), + window_after_applied=self._print_window(file_name, start or len(lines), 40), + window_before_applied=self._print_window(original_file_backup_path, start or len(lines), 40), + guidance_message=guidance_message, + ).strip() # Clean up the temporary file if an error occurs with original_file_backup_path.open() as fin, file_name.open("w") as fout: fout.write(fin.read()) if temp_file_path and Path(temp_file_path).exists(): Path(temp_file_path).unlink() - logger.warning(f"An unexpected error occurred: {e}") - raise Exception(f"{error_str}") from e - # raise e + # logger.warning(f"An unexpected error occurred: {e}") + raise Exception(f"{error_info}") from e # Update the file information and print the updated content with file_name.open("r", encoding="utf-8") as file: @@ -717,11 +746,13 @@ class Editor(BaseModel): self.current_line = max(1, len(lines)) # end of original file else: self.current_line = start or n_total_lines or 1 - ret_str += f"[File: {file_name.resolve()} ({n_total_lines} lines total after edit)]\n" - CURRENT_FILE = file_name - ret_str += self._print_window(CURRENT_FILE, self.current_line, self.window) + "\n" - ret_str += MSG_FILE_UPDATED.format(line_number=self.current_line) - return ret_str + cuccess_edit_info = SUCCESS_EDITE_INFO.format( + file_name=file_name.resolve(), + n_total_lines=n_total_lines, + window_after_applied=self._print_window(file_name, self.current_line, self.window), + line_number=self.current_line, + ).strip() + return cuccess_edit_info def edit_file_by_replace(self, file_name: str, to_replace: str, new_content: str) -> str: """Edit a file. This will search for `to_replace` in the given file and replace it with `new_content`. diff --git a/tests/metagpt/environment/mgx_env/run_mgx_env.py b/tests/metagpt/environment/mgx_env/run_mgx_env.py index b495d376a..f0f561774 100644 --- a/tests/metagpt/environment/mgx_env/run_mgx_env.py +++ b/tests/metagpt/environment/mgx_env/run_mgx_env.py @@ -8,7 +8,6 @@ from metagpt.environment.mgx.mgx_env import MGXEnv from metagpt.roles import Architect, Engineer, ProductManager, ProjectManager from metagpt.roles.di.data_analyst import DataAnalyst from metagpt.roles.di.engineer2 import Engineer2 -from metagpt.roles.di.swe_agent import SWEAgent from metagpt.roles.di.team_leader import TeamLeader from metagpt.schema import Message @@ -29,7 +28,6 @@ async def main(requirement="", enable_human_input=False, use_fixed_sop=False, al engineer, # QaEngineer(), DataAnalyst(), - SWEAgent(), ] ) diff --git a/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py b/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py index 86258edbc..1e6d94d4e 100644 --- a/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py +++ b/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py @@ -1,9 +1,11 @@ +import argparse import asyncio import json import os import shutil import sys from datetime import datetime +from pathlib import Path from metagpt.config2 import Config from metagpt.const import DEFAULT_WORKSPACE_ROOT, METAGPT_ROOT @@ -16,8 +18,7 @@ from metagpt.tools.swe_agent_commands.swe_agent_utils import load_hf_dataset config = Config.default() # Specify by yourself Role = Engineer2 -# 调整每个样例的执行时间,太低容易出现提交u数量少的情况 -MAX_MINUTES_PRE_INSTANCE = 20 +global_terminal = Terminal() TEST_REPO_DIR = METAGPT_ROOT / "data" / "test_repo" DATA_DIR = METAGPT_ROOT / "data/hugging_face" @@ -58,29 +59,53 @@ def check_instance_status(instance, swe_result_dir): return True -async def run(instance, swe_result_dir): - if not check_instance_status(instance, swe_result_dir): - logger.info(f"Instance {instance['instance_id']} already exists, skipping execution.") - return +async def terminal_run_command(cmd): + cmd_output = await global_terminal.run_command(cmd) + logger.info(f"command:{cmd} output:\n {cmd_output}") + return cmd_output - repo_path = TEST_REPO_DIR / (instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"]) - # 下载仓库 - logger.info(f"repo_path:{repo_path}") - if os.path.exists(repo_path): - # 删除已有的仓库 - logger.info(f"remove exist repo path:{repo_path}") - shutil.rmtree(repo_path) - # 下载仓库 并切换分支 - terminal = Terminal() + +async def refresh_repo(instance, test_repo_dir): + repo_path = Path(test_repo_dir) / ( + instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"] + ) repo_identifier = instance["repo"] base_commit = instance["base_commit"] clone_command = f"git clone 'https://github.com/{repo_identifier}.git' {repo_path}" checkout_command = f"cd {repo_path} && git checkout -f {base_commit}" if base_commit else "" - await terminal.run_command(clone_command) - ignore_temp_file_cmd = "echo '.backup.*' >> .gitignore" - logger.info(await terminal.run_command(checkout_command)) - logger.info(await terminal.run_command("git branch")) - await terminal.run_command(ignore_temp_file_cmd) + + if os.path.exists(repo_path): + # 删除已有的仓库 + logger.info(f"remove exist repo path:{repo_path}") + shutil.rmtree(repo_path) + + await terminal_run_command(clone_command) + await terminal_run_command(checkout_command) + await terminal_run_command("git branch") + await terminal_run_command("echo '.backup.*' >> .gitignore") + + return repo_path + + +async def get_git_diff(): + git_diff = "" + try: + await terminal_run_command("git add -A") + git_diff = await terminal_run_command("git diff --cached") + except Exception as e: + logger.error(f"Error during submission: {e}") + return git_diff + + +async def run(instance, swe_result_dir, args): + if not check_instance_status(instance, swe_result_dir) and not args.cover: + logger.info(f"Instance {instance['instance_id']} already exists, skipping execution.") + return + + # preparation for the repo + logger.info(f"**** Preparing to run {instance['instance_id']}****") + test_repo_dir = args.test_repo_dir + repo_path = await refresh_repo(instance, test_repo_dir) user_requirement_and_issue = INSTANCE_TEMPLATE.format( issue=instance["problem_statement"], @@ -94,21 +119,21 @@ async def run(instance, swe_result_dir): logger.info("User Requirement", user_requirement_and_issue) try: role = Role(run_eval=True, editor=Editor(enable_auto_lint=True)) - await asyncio.wait_for(role.run(user_requirement_and_issue), timeout=MAX_MINUTES_PRE_INSTANCE * 60) + await asyncio.wait_for(role.run(user_requirement_and_issue), timeout=args.max_wait_time_per_case * 60) except Exception as e: print(e) logger.info(f"**** exception lead to end: {instance['instance_id']}****") pass - - save_predictions(role, instance, swe_result_dir) + # save the difference of repo + await save_predictions(role, instance, swe_result_dir) logger.info(f"**** Finished running {instance['instance_id']}****") -def save_predictions(role, instance, swe_result_dir): +async def save_predictions(role, instance, swe_result_dir): output_file = swe_result_dir / "all_preds.jsonl" instance["model_name_or_path"] = role.config.llm.model - instance["model_patch"] = role.output_diff - logger.info("model_patch:" + role.output_diff) + instance["model_patch"] = await get_git_diff() + logger.info(f"{instance['model_patch']=}") logger.info(f"Preparing to save predictions to {output_file}") # Save the predictions to a JSONL file @@ -118,31 +143,63 @@ def save_predictions(role, instance, swe_result_dir): logger.info(f"Saved prediction of {instance['instance_id']} to {output_file}") -async def async_main(): +async def async_main(args): dataset_path = "manna-ai/SWE-bench_Nano" # "princeton-nlp/SWE-bench_Lite" #"manna-ai/SWE-bench_Nano" - dataset = load_hf_dataset(dataset_name_or_path=dataset_path, cache_dir=DATA_DIR, split="test") - date_time = datetime.now().strftime("%m%d") - _round = "first" + swe_result_dir = Path(args.save_folder) + if swe_result_dir.exists(): + if args.cover: + logger.info(f"{swe_result_dir} exists and original result remove") + shutil.rmtree(swe_result_dir.absolute()) + else: + logger.info(f"{swe_result_dir} exists and continue test") - exp_name = f"nano_mgx_{date_time}_{_round}" - - now = datetime.now() - formatted_time = now.strftime("%Y_%m_%d_%H_%M_%S") - swe_result_dir = ( - DEFAULT_WORKSPACE_ROOT / f"result_{config.llm.model.replace('/', '_')}_start_time_{formatted_time}" / exp_name - ) - # swe_result_dir = ( - # DEFAULT_WORKSPACE_ROOT / f"result_{config.llm.model.replace('/', '_')}" / exp_name - # ) swe_result_dir.mkdir(parents=True, exist_ok=True) for index, instance in enumerate(dataset): # switch to a new logger file + if index < args.ignore_first_n: + continue logger.remove() logger.add(sys.stderr, level="INFO") - logger.add(swe_result_dir / f"{index+1}_{instance['instance_id']}.log", level="DEBUG") - await run(instance, swe_result_dir) + logger.add(swe_result_dir / "logs" / f"{index+1}_{instance['instance_id']}.log", level="DEBUG") + await run(instance, swe_result_dir, args) if __name__ == "__main__": - asyncio.run(async_main()) + parser = argparse.ArgumentParser(description="the argument of scripts") + # 添加参数 + swe_result_dir = ( + DEFAULT_WORKSPACE_ROOT + / f"result_{config.llm.model.replace('/', '_')}_start_time_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S') }" + ) + test_repo_dir = TEST_REPO_DIR.absolute() + swe_result_dir = swe_result_dir.absolute() + parser.add_argument( + "-rw", "--test_repo_dir", default=test_repo_dir, help="The directory to save temporary repositories", type=str + ) + parser.add_argument("-s", "--save_folder", default=swe_result_dir, help="Folder to save results and logs", type=str) + parser.add_argument( + "-mwtc", "--max_wait_time_per_case", help="Maximum wait time allowed per test case (in minutes)", type=int + ) + parser.add_argument("-n", "--ignore_first_n", default=0, help="Cover the original flag", type=int) + parser.add_argument("-c", "--cover", default=False, help="Cover the original flag", type=bool) + # 解析命令行参数 + args = parser.parse_args() + asyncio.run(async_main(args)) + + +""" +python tests/metagpt/roles/di/run_swe_agent_for_benchmark.py \ +--test_repo_dir "./data/test_repo" \ +--save_folder "./workspace/deepseek_coder_test1" \ +--max_wait_time_per_case 10 +""" + +""" +Cover Mode: +python tests/metagpt/roles/di/run_swe_agent_for_benchmark.py \ +--test_repo_dir "./data/test_repo" \ +--save_folder "./workspace/deepseek_coder_test1" \ +--max_wait_time_per_case 10 \ +--cover +"""