From 4063186836f3661f5f34397698769b451c5f5262 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= <huangweitao@fuzhi.ai>
Date: Fri, 6 Sep 2024 12:04:40 +0800
Subject: [PATCH] update run_swe_bechmark script

---
 metagpt/roles/di/engineer2.py                 | 30 ++++----
 metagpt/tools/libs/editor.py                  | 21 +++---
 .../roles/di/run_swe_agent_for_benchmark.py   | 70 ++++++++++---------
 3 files changed, 65 insertions(+), 56 deletions(-)

diff --git a/metagpt/roles/di/engineer2.py b/metagpt/roles/di/engineer2.py
index 5b264e85e..2310650b3 100644
--- a/metagpt/roles/di/engineer2.py
+++ b/metagpt/roles/di/engineer2.py
@@ -68,26 +68,31 @@ class Engineer2(RoleZero):
     def _update_tool_execution(self):
         # validate = ValidateAndRewriteCode()
         cr = CodeReview()
-        self.tool_execution_map.update(
-            {
-                "Terminal.run_command": self.terminal.run_command,
-                "git_create_pull": git_create_pull,
-                "Engineer2.write_new_code": self.write_new_code,
-                "CodeReview.review": cr.review,
-                "CodeReview.fix": cr.fix,
-                # "ValidateAndRewriteCode.run": validate.run,
-                # "ValidateAndRewriteCode": validate.run,
-            }
-        )
         self.exclusive_tool_commands.append("Engineer2.write_new_code")
-        if self.run_eval:
+        if self.run_eval is True:
+            # Evalute tool map
             self.tool_execution_map.update(
                 {
+                    "git_create_pull": git_create_pull,
+                    "Engineer2.write_new_code": self.write_new_code,
+                    "CodeReview.review": cr.review,
+                    "CodeReview.fix": cr.fix,
                     "Terminal.run_command": self._eval_terminal_run,
                     "RoleZero.ask_human": self._end,
                     "RoleZero.reply_to_human": self._end,
                 }
             )
+        else:
+            # Default tool map
+            self.tool_execution_map.update(
+                {
+                    "git_create_pull": git_create_pull,
+                    "Engineer2.write_new_code": self.write_new_code,
+                    "CodeReview.review": cr.review,
+                    "CodeReview.fix": cr.fix,
+                    "Terminal.run_command": self.terminal.run_command,
+                }
+            )
 
     async def _act(self) -> Message:
         message = await super()._act()
@@ -108,6 +113,7 @@ class Engineer2(RoleZero):
 
     async def write_new_code(self, path: str, instruction: str = "") -> str:
         """Write a new code file.
+
         Args:
             path (str): The absolute path of the file to be created.
             instruction (optional, str): Further hints or notice other than the current task instruction, must be very concise and can be empty. Defaults to "".
diff --git a/metagpt/tools/libs/editor.py b/metagpt/tools/libs/editor.py
index d35e97d07..d35fb124a 100644
--- a/metagpt/tools/libs/editor.py
+++ b/metagpt/tools/libs/editor.py
@@ -537,15 +537,14 @@ class Editor(BaseModel):
         content = "".join(new_lines)
         return content, n_added_lines
 
-    def _get_indentation_info(self, content, first_error_line):
+    def _get_indentation_info(self, content, first_line):
         """
-        Information about the current edit's indentation.
-        Includes guidance on how to fix it.
+        The indentation of the first insert line and the previous line, along with guidance for the next attempt.
         """
         content_lines = content.split("\n")
-        pre_line = content_lines[first_error_line - 2] if first_error_line - 2 >= 0 else ""
+        pre_line = content_lines[first_line - 2] if first_line - 2 >= 0 else ""
         pre_line_indent = len(pre_line) - len(pre_line.lstrip())
-        insert_line = content_lines[first_error_line - 1]
+        insert_line = content_lines[first_line - 1]
         insert_line_indent = len(insert_line) - len(insert_line.lstrip())
         ret_str = INDENTATION_INFO.format(
             pre_line=pre_line,
@@ -802,8 +801,8 @@ class Editor(BaseModel):
             new_content: str: The new content to replace the old content with.
 
         NOTE:
-        This tool is exclusive. If you use this tool, you cannot use any other commands in the current response.
-        If you need to use it multiple times, wait for the next turn.
+            This tool is exclusive. If you use this tool, you cannot use any other commands in the current response.
+            If you need to use it multiple times, wait for the next turn.
         """
         # FIXME: support replacing *all* occurrences
         if to_replace.strip() == "":
@@ -881,8 +880,8 @@ class Editor(BaseModel):
             line_number: int: The line number (starting from 1) to insert the content after.
             content: str: The content to insert.
         NOTE:
-        This tool is exclusive. If you use this tool, you cannot use any other commands in the current response.
-        If you need to use it multiple times, wait for the next turn.
+            This tool is exclusive. If you use this tool, you cannot use any other commands in the current response.
+            If you need to use it multiple times, wait for the next turn.
         """
         file_name = self._try_fix_path(file_name)
 
@@ -904,8 +903,8 @@ class Editor(BaseModel):
             file_name: str: The name of the file to edit.
             content: str: The content to insert.
         NOTE:
-        This tool is exclusive. If you use this tool, you cannot use any other commands in the current response.
-        If you need to use it multiple times, wait for the next turn.
+            This tool is exclusive. If you use this tool, you cannot use any other commands in the current response.
+            If you need to use it multiple times, wait for the next turn.
         """
         file_name = self._try_fix_path(file_name)
 
diff --git a/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py b/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py
index 2d9617442..4071bcf8e 100644
--- a/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py
+++ b/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py
@@ -64,23 +64,31 @@ async def terminal_run_command(cmd):
     return cmd_output
 
 
-async def refresh_repo(instance, test_repo_dir):
+async def refresh_repo(instance, test_repo_dir, reclone_existing_repo=False):
     repo_path = Path(test_repo_dir) / (
         instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"]
     )
     repo_identifier = instance["repo"]
     base_commit = instance["base_commit"]
-    clone_command = f"git clone 'https://github.com/{repo_identifier}.git' {repo_path}"
-    checkout_command = f"cd {repo_path} && git checkout -f {base_commit}" if base_commit else ""
-
-    if os.path.exists(repo_path):
-        # 删除已有的仓库
+    if os.path.exists(repo_path) and reclone_existing_repo is True:
         logger.info(f"remove exist repo path:{repo_path}")
         shutil.rmtree(repo_path)
 
-    await terminal_run_command(clone_command)
-    await terminal_run_command(checkout_command)
+    if os.path.exists(repo_path):
+        logger.info(f"reset exist repo path:{repo_path}")
+        await terminal_run_command(f"cd {repo_path} && git reset --hard && git clean -n -d && git clean -f -d")
+        await terminal_run_command("BRANCH=$(git remote show origin | awk '/HEAD branch/ {print $NF}')")
+        await terminal_run_command("echo $BRANCH")
+        await terminal_run_command('git checkout "$BRANCH"')
+    else:
+        logger.info(f"clone repo to path:{repo_path}")
+        clone_command = f"git clone 'https://github.com/{repo_identifier}.git' {repo_path}"
+        checkout_command = f"cd {repo_path} " + "&& git checkout -f {base_commit}" if base_commit else ""
+        await terminal_run_command(clone_command)
+        await terminal_run_command(checkout_command)
+
     await terminal_run_command("git branch")
+    # ignore backup file
     await terminal_run_command("echo '.backup.*' >> .gitignore")
 
     return repo_path
@@ -97,14 +105,14 @@ async def get_git_diff():
 
 
 async def run(instance, swe_result_dir, args):
-    if not check_instance_status(instance, swe_result_dir) and not args.cover:
+    if not check_instance_status(instance, swe_result_dir):
         logger.info(f"Instance {instance['instance_id']} already exists, skipping execution.")
         return
 
     # preparation for the repo
     logger.info(f"**** Preparing to run {instance['instance_id']}****")
     test_repo_dir = args.test_repo_dir
-    repo_path = await refresh_repo(instance, test_repo_dir)
+    repo_path = await refresh_repo(instance, test_repo_dir, args.reclone_existing_repo)
 
     user_requirement_and_issue = INSTANCE_TEMPLATE.format(
         issue=instance["problem_statement"],
@@ -117,22 +125,20 @@ async def run(instance, swe_result_dir, args):
     logger.info(f"**** Starting to run {instance['instance_id']}****")
     logger.info("User Requirement", user_requirement_and_issue)
     try:
-        role = Engineer2(run_eval=True, editor=Editor(enable_auto_lint=True))
-        await asyncio.wait_for(role.run(user_requirement_and_issue), timeout=args.max_wait_time_per_case * 60)
+        engineer = Engineer2(run_eval=True, editor=Editor(enable_auto_lint=True))
+        await asyncio.wait_for(engineer.run(user_requirement_and_issue), timeout=args.max_wait_time_per_case * 60)
     except Exception as e:
-        print(e)
-        logger.info(f"**** exception lead to end: {instance['instance_id']}****")
-        pass
+        logger.warning(f"**** exception lead to end: {instance['instance_id']}****\n\nerror:{e}")
     # save the difference of repo
-    await save_predictions(role, instance, swe_result_dir)
+    await save_predictions(engineer, instance, swe_result_dir)
     logger.info(f"**** Finished running {instance['instance_id']}****")
 
 
-async def save_predictions(role, instance, swe_result_dir):
+async def save_predictions(engineer, instance, swe_result_dir):
     output_file = swe_result_dir / "all_preds.jsonl"
-    instance["model_name_or_path"] = role.config.llm.model
+    instance["model_name_or_path"] = engineer.config.llm.model
     instance["model_patch"] = await get_git_diff()
-    logger.info(f"{instance['model_patch']=}")
+    logger.info(f"'model_patch':\n{instance['model_patch']}")
     logger.info(f"Preparing to save predictions to {output_file}")
 
     # Save the predictions to a JSONL file
@@ -147,16 +153,9 @@ async def async_main(args):
     dataset = load_hf_dataset(dataset_name_or_path=dataset_path, cache_dir=DATA_DIR, split="test")
     swe_result_dir = Path(args.save_folder)
     if swe_result_dir.exists():
-        if args.cover:
-            logger.info(f"{swe_result_dir} exists and original result remove")
-            shutil.rmtree(swe_result_dir.absolute())
-        else:
-            logger.info(f"{swe_result_dir} exists and continue test")
-
+        logger.info(f"{swe_result_dir} exists; resuming test from last checkpoint.")
     swe_result_dir.mkdir(parents=True, exist_ok=True)
     for index, instance in enumerate(dataset):
-        if index < args.ignore_first_n:
-            continue
         # switch to a new logger file
         logger.remove()
         logger.add(sys.stderr, level="INFO")
@@ -180,25 +179,30 @@ if __name__ == "__main__":
     parser.add_argument(
         "-mwtc", "--max_wait_time_per_case", help="Maximum wait time allowed per test case (in minutes)", type=int
     )
-    parser.add_argument("-n", "--ignore_first_n", default=0, help="Cover the original flag", type=int)
-    parser.add_argument("-c", "--cover", default=False, help="Cover the original flag", type=bool)
+    parser.add_argument(
+        "-o",
+        "--reclone_existing_repo",
+        action="store_true",
+        help="If set, the existing repository will be removed and recloned.",
+    )
     # 解析命令行参数
     args = parser.parse_args()
     asyncio.run(async_main(args))
 
 
 """
+#
 python tests/metagpt/roles/di/run_swe_agent_for_benchmark.py \
 --test_repo_dir "./data/test_repo" \
---save_folder "./workspace/deepseek_coder_test1" \
+--save_folder "./workspace/deepseek_coder_0907" \
 --max_wait_time_per_case 10 
 """
 
 """
-Cover Mode:
+# 重新克隆仓库
 python tests/metagpt/roles/di/run_swe_agent_for_benchmark.py \
 --test_repo_dir "./data/test_repo" \
---save_folder "./workspace/deepseek_coder_test1" \
+--save_folder "./workspace/deepseek_coder_0907" \
 --max_wait_time_per_case 10 \
---cover
+--reclone_existing_repo
 """