From 4063186836f3661f5f34397698769b451c5f5262 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Fri, 6 Sep 2024 12:04:40 +0800 Subject: [PATCH] update run_swe_bechmark script --- metagpt/roles/di/engineer2.py | 30 ++++---- metagpt/tools/libs/editor.py | 21 +++--- .../roles/di/run_swe_agent_for_benchmark.py | 70 ++++++++++--------- 3 files changed, 65 insertions(+), 56 deletions(-) diff --git a/metagpt/roles/di/engineer2.py b/metagpt/roles/di/engineer2.py index 5b264e85e..2310650b3 100644 --- a/metagpt/roles/di/engineer2.py +++ b/metagpt/roles/di/engineer2.py @@ -68,26 +68,31 @@ class Engineer2(RoleZero): def _update_tool_execution(self): # validate = ValidateAndRewriteCode() cr = CodeReview() - self.tool_execution_map.update( - { - "Terminal.run_command": self.terminal.run_command, - "git_create_pull": git_create_pull, - "Engineer2.write_new_code": self.write_new_code, - "CodeReview.review": cr.review, - "CodeReview.fix": cr.fix, - # "ValidateAndRewriteCode.run": validate.run, - # "ValidateAndRewriteCode": validate.run, - } - ) self.exclusive_tool_commands.append("Engineer2.write_new_code") - if self.run_eval: + if self.run_eval is True: + # Evalute tool map self.tool_execution_map.update( { + "git_create_pull": git_create_pull, + "Engineer2.write_new_code": self.write_new_code, + "CodeReview.review": cr.review, + "CodeReview.fix": cr.fix, "Terminal.run_command": self._eval_terminal_run, "RoleZero.ask_human": self._end, "RoleZero.reply_to_human": self._end, } ) + else: + # Default tool map + self.tool_execution_map.update( + { + "git_create_pull": git_create_pull, + "Engineer2.write_new_code": self.write_new_code, + "CodeReview.review": cr.review, + "CodeReview.fix": cr.fix, + "Terminal.run_command": self.terminal.run_command, + } + ) async def _act(self) -> Message: message = await super()._act() @@ -108,6 +113,7 @@ class Engineer2(RoleZero): async def write_new_code(self, path: str, instruction: str = "") -> str: """Write a new code file. + Args: path (str): The absolute path of the file to be created. instruction (optional, str): Further hints or notice other than the current task instruction, must be very concise and can be empty. Defaults to "". diff --git a/metagpt/tools/libs/editor.py b/metagpt/tools/libs/editor.py index d35e97d07..d35fb124a 100644 --- a/metagpt/tools/libs/editor.py +++ b/metagpt/tools/libs/editor.py @@ -537,15 +537,14 @@ class Editor(BaseModel): content = "".join(new_lines) return content, n_added_lines - def _get_indentation_info(self, content, first_error_line): + def _get_indentation_info(self, content, first_line): """ - Information about the current edit's indentation. - Includes guidance on how to fix it. + The indentation of the first insert line and the previous line, along with guidance for the next attempt. """ content_lines = content.split("\n") - pre_line = content_lines[first_error_line - 2] if first_error_line - 2 >= 0 else "" + pre_line = content_lines[first_line - 2] if first_line - 2 >= 0 else "" pre_line_indent = len(pre_line) - len(pre_line.lstrip()) - insert_line = content_lines[first_error_line - 1] + insert_line = content_lines[first_line - 1] insert_line_indent = len(insert_line) - len(insert_line.lstrip()) ret_str = INDENTATION_INFO.format( pre_line=pre_line, @@ -802,8 +801,8 @@ class Editor(BaseModel): new_content: str: The new content to replace the old content with. NOTE: - This tool is exclusive. If you use this tool, you cannot use any other commands in the current response. - If you need to use it multiple times, wait for the next turn. + This tool is exclusive. If you use this tool, you cannot use any other commands in the current response. + If you need to use it multiple times, wait for the next turn. """ # FIXME: support replacing *all* occurrences if to_replace.strip() == "": @@ -881,8 +880,8 @@ class Editor(BaseModel): line_number: int: The line number (starting from 1) to insert the content after. content: str: The content to insert. NOTE: - This tool is exclusive. If you use this tool, you cannot use any other commands in the current response. - If you need to use it multiple times, wait for the next turn. + This tool is exclusive. If you use this tool, you cannot use any other commands in the current response. + If you need to use it multiple times, wait for the next turn. """ file_name = self._try_fix_path(file_name) @@ -904,8 +903,8 @@ class Editor(BaseModel): file_name: str: The name of the file to edit. content: str: The content to insert. NOTE: - This tool is exclusive. If you use this tool, you cannot use any other commands in the current response. - If you need to use it multiple times, wait for the next turn. + This tool is exclusive. If you use this tool, you cannot use any other commands in the current response. + If you need to use it multiple times, wait for the next turn. """ file_name = self._try_fix_path(file_name) diff --git a/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py b/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py index 2d9617442..4071bcf8e 100644 --- a/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py +++ b/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py @@ -64,23 +64,31 @@ async def terminal_run_command(cmd): return cmd_output -async def refresh_repo(instance, test_repo_dir): +async def refresh_repo(instance, test_repo_dir, reclone_existing_repo=False): repo_path = Path(test_repo_dir) / ( instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"] ) repo_identifier = instance["repo"] base_commit = instance["base_commit"] - clone_command = f"git clone 'https://github.com/{repo_identifier}.git' {repo_path}" - checkout_command = f"cd {repo_path} && git checkout -f {base_commit}" if base_commit else "" - - if os.path.exists(repo_path): - # 删除已有的仓库 + if os.path.exists(repo_path) and reclone_existing_repo is True: logger.info(f"remove exist repo path:{repo_path}") shutil.rmtree(repo_path) - await terminal_run_command(clone_command) - await terminal_run_command(checkout_command) + if os.path.exists(repo_path): + logger.info(f"reset exist repo path:{repo_path}") + await terminal_run_command(f"cd {repo_path} && git reset --hard && git clean -n -d && git clean -f -d") + await terminal_run_command("BRANCH=$(git remote show origin | awk '/HEAD branch/ {print $NF}')") + await terminal_run_command("echo $BRANCH") + await terminal_run_command('git checkout "$BRANCH"') + else: + logger.info(f"clone repo to path:{repo_path}") + clone_command = f"git clone 'https://github.com/{repo_identifier}.git' {repo_path}" + checkout_command = f"cd {repo_path} " + "&& git checkout -f {base_commit}" if base_commit else "" + await terminal_run_command(clone_command) + await terminal_run_command(checkout_command) + await terminal_run_command("git branch") + # ignore backup file await terminal_run_command("echo '.backup.*' >> .gitignore") return repo_path @@ -97,14 +105,14 @@ async def get_git_diff(): async def run(instance, swe_result_dir, args): - if not check_instance_status(instance, swe_result_dir) and not args.cover: + if not check_instance_status(instance, swe_result_dir): logger.info(f"Instance {instance['instance_id']} already exists, skipping execution.") return # preparation for the repo logger.info(f"**** Preparing to run {instance['instance_id']}****") test_repo_dir = args.test_repo_dir - repo_path = await refresh_repo(instance, test_repo_dir) + repo_path = await refresh_repo(instance, test_repo_dir, args.reclone_existing_repo) user_requirement_and_issue = INSTANCE_TEMPLATE.format( issue=instance["problem_statement"], @@ -117,22 +125,20 @@ async def run(instance, swe_result_dir, args): logger.info(f"**** Starting to run {instance['instance_id']}****") logger.info("User Requirement", user_requirement_and_issue) try: - role = Engineer2(run_eval=True, editor=Editor(enable_auto_lint=True)) - await asyncio.wait_for(role.run(user_requirement_and_issue), timeout=args.max_wait_time_per_case * 60) + engineer = Engineer2(run_eval=True, editor=Editor(enable_auto_lint=True)) + await asyncio.wait_for(engineer.run(user_requirement_and_issue), timeout=args.max_wait_time_per_case * 60) except Exception as e: - print(e) - logger.info(f"**** exception lead to end: {instance['instance_id']}****") - pass + logger.warning(f"**** exception lead to end: {instance['instance_id']}****\n\nerror:{e}") # save the difference of repo - await save_predictions(role, instance, swe_result_dir) + await save_predictions(engineer, instance, swe_result_dir) logger.info(f"**** Finished running {instance['instance_id']}****") -async def save_predictions(role, instance, swe_result_dir): +async def save_predictions(engineer, instance, swe_result_dir): output_file = swe_result_dir / "all_preds.jsonl" - instance["model_name_or_path"] = role.config.llm.model + instance["model_name_or_path"] = engineer.config.llm.model instance["model_patch"] = await get_git_diff() - logger.info(f"{instance['model_patch']=}") + logger.info(f"'model_patch':\n{instance['model_patch']}") logger.info(f"Preparing to save predictions to {output_file}") # Save the predictions to a JSONL file @@ -147,16 +153,9 @@ async def async_main(args): dataset = load_hf_dataset(dataset_name_or_path=dataset_path, cache_dir=DATA_DIR, split="test") swe_result_dir = Path(args.save_folder) if swe_result_dir.exists(): - if args.cover: - logger.info(f"{swe_result_dir} exists and original result remove") - shutil.rmtree(swe_result_dir.absolute()) - else: - logger.info(f"{swe_result_dir} exists and continue test") - + logger.info(f"{swe_result_dir} exists; resuming test from last checkpoint.") swe_result_dir.mkdir(parents=True, exist_ok=True) for index, instance in enumerate(dataset): - if index < args.ignore_first_n: - continue # switch to a new logger file logger.remove() logger.add(sys.stderr, level="INFO") @@ -180,25 +179,30 @@ if __name__ == "__main__": parser.add_argument( "-mwtc", "--max_wait_time_per_case", help="Maximum wait time allowed per test case (in minutes)", type=int ) - parser.add_argument("-n", "--ignore_first_n", default=0, help="Cover the original flag", type=int) - parser.add_argument("-c", "--cover", default=False, help="Cover the original flag", type=bool) + parser.add_argument( + "-o", + "--reclone_existing_repo", + action="store_true", + help="If set, the existing repository will be removed and recloned.", + ) # 解析命令行参数 args = parser.parse_args() asyncio.run(async_main(args)) """ +# python tests/metagpt/roles/di/run_swe_agent_for_benchmark.py \ --test_repo_dir "./data/test_repo" \ ---save_folder "./workspace/deepseek_coder_test1" \ +--save_folder "./workspace/deepseek_coder_0907" \ --max_wait_time_per_case 10 """ """ -Cover Mode: +# 重新克隆仓库 python tests/metagpt/roles/di/run_swe_agent_for_benchmark.py \ --test_repo_dir "./data/test_repo" \ ---save_folder "./workspace/deepseek_coder_test1" \ +--save_folder "./workspace/deepseek_coder_0907" \ --max_wait_time_per_case 10 \ ---cover +--reclone_existing_repo """