From 92f94862cbdb2019fccf7990e6842d3a3066b9ab Mon Sep 17 00:00:00 2001
From: seeker <liushaojie@fuzhi.ai>
Date: Tue, 2 Jul 2024 21:26:21 +0800
Subject: [PATCH] update: swe

---
 metagpt/prompts/di/swe.py                     |  16 +-
 metagpt/roles/di/role_zero.py                 |   7 +-
 metagpt/roles/di/swe.py                       |  23 +-
 metagpt/tools/swe_agent_commands/__init__.py  |   7 +
 metagpt/tools/swe_agent_commands/defaults.sh  |   2 +-
 .../swe_agent_commands/execute_env_utils.py   | 359 ------------------
 tests/metagpt/roles/di/run_swe.py             |  16 +-
 7 files changed, 57 insertions(+), 373 deletions(-)
 create mode 100644 metagpt/tools/swe_agent_commands/__init__.py
 delete mode 100644 metagpt/tools/swe_agent_commands/execute_env_utils.py

diff --git a/metagpt/prompts/di/swe.py b/metagpt/prompts/di/swe.py
index 15e917e11..64c67b09b 100644
--- a/metagpt/prompts/di/swe.py
+++ b/metagpt/prompts/di/swe.py
@@ -1,3 +1,10 @@
+"""
+This code is adapted from the examples provided in the SWE-agent project.
+You can find the original examples from the SWE-agent project here:
+https://github.com/princeton-nlp/SWE-agent/tree/main/config/configs
+"""
+
+
 SWE_AGENT_SYSTEM_TEMPLATE = """
 SETTING: You are an autonomous programmer, and you're working directly in the command line with a special interface.
 
@@ -8,9 +15,13 @@ If you'd like to add the line '        print(x)' you must fully write that out,
 
 Always review your changes post-edit to ensure they accurately reflect your intentions. If the changes are not as desired, don't hesitate to issue another command to correct them.
 
-Your output should always contain a section of reasoning and a command described in JSON format. The command must always contain command_name and args fields. The command_name field should always be Bash.run, and the args field should always include a cmd field containing the bash command, as shown in the example below:
-<output_format>
+Your output should always contain a section of reasoning and a command described in JSON format.
+The command must always contain command_name and args fields. The command_name field should always be Bash.run, and the args field should always include a cmd field containing the bash command.
+Use \\n to represent line breaks, ensuring the command conforms to the JSON format and is displayed on a single line. Except for the `edit` command, each parameter of the command needs to be enclosed in single quotes.
+As shown in the example below:
+
 First I'll start by using ls to see what files are in the current directory. Then maybe we can look at some relevant files to see what they look like.
+
 ```json
 {{
     "command_name": "Bash.run",
@@ -18,7 +29,6 @@ First I'll start by using ls to see what files are in the current directory. The
         "cmd": "ls -a" 
     }}
 }}
-</output_format>
 ```
 
 
diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py
index 53d7393d6..c6ed6f6c6 100644
--- a/metagpt/roles/di/role_zero.py
+++ b/metagpt/roles/di/role_zero.py
@@ -4,7 +4,7 @@ import inspect
 import json
 import re
 import traceback
-from typing import Callable, Dict, List, Literal, Tuple, Union
+from typing import Callable, Dict, List, Literal, Tuple
 
 from pydantic import model_validator
 
@@ -167,7 +167,7 @@ class RoleZero(Role):
         if self.use_fixed_sop:
             return await super()._act()
 
-        commands, ok = await self._get_commands()
+        commands, ok = await self._parse_commands()
         if not ok:
             error_msg = commands
             return error_msg
@@ -202,7 +202,7 @@ class RoleZero(Role):
             actions_taken += 1
         return rsp  # return output from the last action
 
-    async def _get_commands(self) -> Tuple[Union[UserMessage, List[Dict]], bool]:
+    async def _parse_commands(self) -> Tuple[List[Dict], bool]:
         """Retrieves commands from the Large Language Model (LLM).
 
         This function attempts to retrieve a list of commands from the LLM by
@@ -211,7 +211,6 @@ class RoleZero(Role):
 
         Returns:
             A tuple containing:
-                - A `UserMessage` object or dict representing the commands.
                 - A boolean flag indicating success (True) or failure (False).
         """
         try:
diff --git a/metagpt/roles/di/swe.py b/metagpt/roles/di/swe.py
index 1fd1324cf..6d357c02b 100644
--- a/metagpt/roles/di/swe.py
+++ b/metagpt/roles/di/swe.py
@@ -22,6 +22,7 @@ class SWE(RoleZero):
     _system_msg: str = SWE_AGENT_SYSTEM_TEMPLATE
     system_msg: list[str] = [_system_msg.format(WINDOW=_bash_window_size)]
     _instruction: str = NEXT_STEP_TEMPLATE
+    # tools: list[str] = ["Bash", "Browser"]
     tools: list[str] = ["Bash"]
     terminal: Bash = Field(default_factory=Bash, exclude=True)
     output_diff: str = ""
@@ -35,11 +36,23 @@ class SWE(RoleZero):
         return res
 
     def _set_system_msg(self):
+        """
+        Sets the system message for the SWE agent.
+
+        Sets the `_bash_window_size` from the environment variable `WINDOW` if it exists.
+        Formats the `_system_msg` template with the current `_bash_window_size`.
+        """
         if os.getenv("WINDOW"):
             self._bash_window_size = int(os.getenv("WINDOW"))
         self.system_msg = [self._system_msg.format(WINDOW=self._bash_window_size)]
 
     def _format_instruction(self):
+        """
+        Formats the instruction message for the SWE agent.
+
+        Runs the "state" command in the terminal, parses its output as JSON,
+        and uses it to format the `_instruction` template.
+        """
         state_output = self.terminal.run("state")
         bash_state = json.loads(state_output)
 
@@ -50,7 +63,15 @@ class SWE(RoleZero):
         return self.instruction
 
     async def _handle_action(self):
-        commands, ok = await self._get_commands()
+        """
+        Handles actions based on parsed commands.
+
+        Parses commands, checks for a "submit" action, and generates a patch using `git diff`.
+        Stores the cleaned patch in `output_diff`. Logs any exceptions.
+
+        This function is specifically added for SWE bench evaluation.
+        """
+        commands, ok = await self._parse_commands()
         if not ok:
             return
         for cmd in commands:
diff --git a/metagpt/tools/swe_agent_commands/__init__.py b/metagpt/tools/swe_agent_commands/__init__.py
new file mode 100644
index 000000000..42e92a12d
--- /dev/null
+++ b/metagpt/tools/swe_agent_commands/__init__.py
@@ -0,0 +1,7 @@
+"""
+This tool is originally developed by the team behind the princeton-nlp/SWE-agent repository.
+You can find the original repository here:
+https://github.com/princeton-nlp/SWE-agent/tree/main/config/commands
+We are using a modified version from OpenDevin:
+https://github.com/OpenDevin/OpenDevin/tree/main/opendevin/runtime/plugins/swe_agent_commands
+"""
diff --git a/metagpt/tools/swe_agent_commands/defaults.sh b/metagpt/tools/swe_agent_commands/defaults.sh
index 880920711..f0898aabc 100644
--- a/metagpt/tools/swe_agent_commands/defaults.sh
+++ b/metagpt/tools/swe_agent_commands/defaults.sh
@@ -177,7 +177,7 @@ create() {
 
 # @yaml
 # signature: submit
-# docstring: submits your current code and terminates the session
+# docstring: submits your current code and terminates the session. this is the only submit action needed; no need to run git add or git commit before this.
 submit() {
     # Check if the patch file exists and is non-empty
     if [ -s "$SWE_CMD_WORK_DIR/test.patch" ]; then
diff --git a/metagpt/tools/swe_agent_commands/execute_env_utils.py b/metagpt/tools/swe_agent_commands/execute_env_utils.py
deleted file mode 100644
index 8fbc616fa..000000000
--- a/metagpt/tools/swe_agent_commands/execute_env_utils.py
+++ /dev/null
@@ -1,359 +0,0 @@
-import json
-import os
-import re
-import select
-import shlex
-import subprocess
-import tarfile
-import tempfile
-import threading
-import time
-import traceback
-from io import BytesIO
-from subprocess import PIPE, STDOUT
-from typing import Tuple
-
-import docker
-from datasets import load_dataset, load_from_disk
-from ghapi.all import GhApi
-
-from metagpt.logs import logger
-
-LOGGER_NAME = "intercode"
-START_UP_DELAY = 5
-TIMEOUT_DURATION = 25
-GITHUB_ISSUE_URL_PATTERN = re.compile(r"github\.com\/(.*?)\/(.*?)\/issues\/(\d+)")
-
-
-def is_from_github_url(data_path: str):
-    return GITHUB_ISSUE_URL_PATTERN.search(data_path) is not None
-
-
-def copy_file_to_container(container, contents, container_path):
-    """
-    Copies a given string into a Docker container at a specified path.
-
-    Args:
-    - container: Docker SDK container object.
-    - contents: The string to copy into the container.
-    - container_path: The path inside the container where the string should be copied to.
-
-    Returns:
-    - None
-    """
-    temp_file_name = None
-
-    try:
-        # Create a temporary file
-        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
-            temp_file_name = temp_file.name
-            # Write the string to the temporary file and ensure it's written to disk
-            temp_file.write(contents.encode("utf-8"))
-            temp_file.flush()
-            os.fsync(temp_file.fileno())
-
-        # Create a TAR archive in memory containing the temporary file
-        with tempfile.NamedTemporaryFile():
-            with open(temp_file_name, "rb") as temp_file:
-                # Prepare the TAR archive
-                with BytesIO() as tar_stream:
-                    with tarfile.open(fileobj=tar_stream, mode="w") as tar:
-                        tar_info = tarfile.TarInfo(name=os.path.basename(container_path))
-                        tar_info.size = os.path.getsize(temp_file_name)
-                        tar.addfile(tarinfo=tar_info, fileobj=temp_file)
-                    tar_stream.seek(0)
-                    # Copy the TAR stream to the container
-                    container.put_archive(path=os.path.dirname(container_path), data=tar_stream.read())
-
-    except Exception as e:
-        logger.error(f"An error occurred: {e}")
-        logger.error(traceback.format_exc())
-    finally:
-        # Cleanup: Remove the temporary file if it was created
-        if temp_file_name and os.path.exists(temp_file_name):
-            os.remove(temp_file_name)
-
-
-def read_with_timeout(container, pid_func, timeout_duration):
-    """
-    Read data from a subprocess with a timeout.
-    This function uses a file descriptor to read data from the subprocess in a non-blocking way.
-
-    Args:
-        container (subprocess.Popen): The subprocess container.
-        pid_func (function): A function that returns a list of process IDs (except the PID of the main process).
-        timeout_duration (int): The timeout duration in seconds.
-
-    Returns:
-        str: The data read from the subprocess, stripped of trailing newline characters.
-
-    Raises:
-        TimeoutError: If the timeout duration is reached while reading from the subprocess.
-    """
-    buffer = b""
-    fd = container.stdout.fileno()
-    end_time = time.time() + timeout_duration
-
-    while time.time() < end_time:
-        pids = pid_func()
-        if len(pids) > 0:
-            # There are still PIDs running
-            time.sleep(0.05)
-            continue
-        ready_to_read, _, _ = select.select([fd], [], [], 0.1)
-        if ready_to_read:
-            data = os.read(fd, 4096)
-            if data:
-                buffer += data
-        else:
-            # No more data to read
-            break
-        time.sleep(0.05)  # Prevents CPU hogging
-
-    if container.poll() is not None:
-        raise RuntimeError("Subprocess exited unexpectedly.\nCurrent buffer: {}".format(buffer.decode()))
-    if time.time() >= end_time:
-        raise TimeoutError(
-            "Timeout reached while reading from subprocess.\nCurrent buffer: {}\nRunning PIDs: {}".format(
-                buffer.decode(), pids
-            )
-        )
-    return buffer.decode()
-
-
-class timeout:
-    def __init__(self, seconds=TIMEOUT_DURATION, error_message="Timeout"):
-        self.seconds = seconds
-        self.error_message = error_message
-        self.timer = None
-        self.timeout_occurred = False
-
-    def handle_timeout(self, signum=None, frame=None):
-        self.timeout_occurred = True
-
-    def __enter__(self):
-        # signal.signal(signal.SIGALRM, self.handle_timeout)
-        # signal.alarm(self.seconds)
-        self.timer = threading.Timer(self.seconds, self.handle_timeout)
-        self.timer.start()
-        return self
-
-    def __exit__(self, type, value, traceback):
-        self.timer.cancel()
-        if self.timeout_occurred:
-            print(self.error_message)  # 处理超时的逻辑
-
-
-def get_background_pids(container_obj):
-    pids = container_obj.exec_run("ps -eo pid,comm --no-headers").output.decode().split("\n")
-    pids = [x.split() for x in pids if x]
-    pids = [x for x in pids if x[1] not in {"ps"} and x[0] != "1"]
-    bash_pids = [x for x in pids if x[1] == "bash"]
-    other_pids = [x for x in pids if x[1] not in {"bash"}]
-    return bash_pids, other_pids
-
-
-def _get_non_persistent_container(ctr_name: str, image_name: str) -> Tuple[subprocess.Popen, set]:
-    startup_cmd = [
-        "docker",
-        "run",
-        "-i",
-        "--rm",
-        "--name",
-        ctr_name,
-        image_name,
-        "/bin/bash",
-        "-l",
-        "-m",
-    ]
-
-    logger.debug("Starting container with command: %s", shlex.join(startup_cmd))
-    container = subprocess.Popen(
-        startup_cmd,
-        stdin=PIPE,
-        stdout=PIPE,
-        stderr=STDOUT,
-        text=True,
-        bufsize=1,  # line buffered
-    )
-    time.sleep(START_UP_DELAY)
-    # try to read output from container setup (usually an error), timeout if no output
-    try:
-        with timeout(seconds=2):
-            output = container.stdout.read()
-            if output:
-                logger.error(f"Unexpected container setup output: {output}")
-    except TimeoutError:
-        pass
-    return container, {
-        "1",
-    }  # bash PID is always 1 for non-persistent containers
-
-
-def _get_persistent_container(ctr_name: str, image_name: str, persistent: bool = False) -> Tuple[subprocess.Popen, set]:
-    client = docker.from_env()
-    containers = client.containers.list(all=True, filters={"name": ctr_name})
-    if ctr_name in [c.name for c in containers]:
-        container_obj = client.containers.get(ctr_name)
-        if container_obj.status in {"created"}:
-            container_obj.start()
-        elif container_obj.status in {"running"}:
-            pass
-        elif container_obj.status in {"exited"}:
-            container_obj.restart()
-        elif container_obj.status in {"paused"}:
-            container_obj.unpause()
-        else:
-            raise RuntimeError(f"Unexpected container status: {container_obj.status}")
-    else:
-        container_obj = client.containers.run(
-            image_name,
-            command="/bin/bash -l -m",
-            name=ctr_name,
-            stdin_open=True,
-            tty=True,
-            detach=True,
-            auto_remove=not persistent,
-        )
-        container_obj.start()
-    startup_cmd = [
-        "docker",
-        "exec",
-        "-i",
-        ctr_name,
-        "/bin/bash",
-        "-l",
-        "-m",
-    ]
-    logger.debug("Starting container with command: %s", shlex.join(startup_cmd))
-    container = subprocess.Popen(
-        startup_cmd,
-        stdin=PIPE,
-        stdout=PIPE,
-        stderr=STDOUT,
-        text=True,
-        bufsize=1,  # line buffered
-    )
-    time.sleep(START_UP_DELAY)
-    # try to read output from container setup (usually an error), timeout if no output
-    try:
-        with timeout(seconds=2):
-            output = container.stdout.read()
-            if output:
-                logger.error(f"Unexpected container setup output: {output}")
-    except TimeoutError:
-        pass
-    # Get the process IDs of the container
-    # There should be at least a head process and possibly one child bash process
-    bash_pids, other_pids = get_background_pids(container_obj)
-    bash_pid = 1
-    if len(bash_pids) == 1:
-        bash_pid = bash_pids[0][0]
-    elif len(bash_pids) > 1 or len(other_pids) > 0:
-        raise RuntimeError(
-            f"Detected alien processes attached or running. Please ensure that no other agents are running on this container. PIDs: {bash_pids}, {other_pids}"
-        )
-    return container, set(
-        map(
-            str,
-            [
-                bash_pid,
-                1,
-            ],
-        )
-    )
-
-
-def get_container(ctr_name: str, image_name: str, persistent: bool = False) -> subprocess.Popen:
-    """
-    Get a container object for a given container name and image name
-
-    Arguments:
-        ctr_name (str): Name of container
-        image_name (str): Name of image
-        persistent (bool): Whether to use a persistent container or not
-    Returns:
-        Container object
-    """
-    if persistent:
-        return _get_persistent_container(ctr_name, image_name)
-    else:
-        return _get_non_persistent_container(ctr_name, image_name)
-
-
-def get_commit(api: GhApi, owner: str, repo: str, base_commit: str = None):
-    if base_commit:
-        commit = api.repos.get_commit(owner, repo, base_commit)
-    else:
-        commit = api.repos.list_commits(owner, repo)[0]
-    return commit
-
-
-class InvalidGithubURL(ValueError):
-    ...
-
-
-def parse_gh_issue_url(issue_url: str) -> Tuple[str, str, str]:
-    """Return owner, repo, issue number from issue url"""
-    match = GITHUB_ISSUE_URL_PATTERN.search(issue_url)
-    if not match:
-        raise InvalidGithubURL(f"Invalid GitHub issue URL: {issue_url}")
-    res = match.groups()
-    assert len(res) == 3
-    return tuple(res)  # type: ignore
-
-
-def get_instances(file_path: str, base_commit: str = None, split: str = None, token: str = None):
-    """
-    Getter function for handling json, jsonl files
-
-    Arguments:
-        file_path (str): Path to file
-    Returns:
-        List of instances
-    """
-    # If file_path is a directory, attempt load from disk
-    if os.path.isdir(file_path):
-        dataset_or_dict = load_from_disk(file_path)
-        if isinstance(dataset_or_dict, dict):
-            return dataset_or_dict[split]
-        return dataset_or_dict
-
-    # If file_path is a github issue url, fetch the issue and return a single instance
-    if is_from_github_url(file_path):
-        try:
-            owner, repo, issue_number = parse_gh_issue_url(file_path)
-        except InvalidGithubURL:
-            pass
-        else:
-            record = dict()
-            api = GhApi(token=token)
-            issue = api.issues.get(owner, repo, issue_number)
-            title = issue.title if issue.title else ""
-            body = issue.body if issue.body else ""
-            text = f"{title}\n{body}\n"
-            record["repo"] = f"{owner}/{repo}"
-            record["base_commit"] = base_commit if base_commit else get_commit(api, owner, repo, base_commit).sha
-            record["version"] = record["base_commit"][:7]
-            record["problem_statement"] = text
-            record["instance_id"] = f"{owner}__{repo}-i{issue_number}"
-            return [
-                record,
-            ]
-    elif base_commit is not None:
-        raise ValueError("base_commit must be None if data_path is not a github issue url")
-
-    # If file_path is a file, load the file
-    if file_path.endswith(".json"):
-        return json.load(open(file_path))
-    if file_path.endswith(".jsonl"):
-        return [json.loads(x) for x in open(file_path, "r").readlines()]
-
-    # Attempt load from HF datasets as a last resort
-    try:
-        return load_dataset(file_path, split=split)
-    except:
-        raise ValueError(
-            f"Could not load instances from {file_path}. "
-            "Please ensure --data_path is a GitHub URL, a SWE-bench HuggingFace dataset, or a JSON/JSONL file."
-        )
diff --git a/tests/metagpt/roles/di/run_swe.py b/tests/metagpt/roles/di/run_swe.py
index 42694020f..c6cc56fd1 100644
--- a/tests/metagpt/roles/di/run_swe.py
+++ b/tests/metagpt/roles/di/run_swe.py
@@ -7,6 +7,7 @@ from metagpt.config2 import config
 from metagpt.const import DEFAULT_WORKSPACE_ROOT, METAGPT_ROOT
 from metagpt.logs import logger
 from metagpt.roles.di.swe import SWE
+from metagpt.tools.libs.terminal import Terminal
 from metagpt.tools.swe_agent_commands.swe_agent_utils import load_hf_dataset
 
 # Specify by yourself
@@ -25,8 +26,9 @@ We're currently solving the following issue within our repository. You can use a
 hints text is the comment under issue:
 {hints_text}
 
-The repo may already exist at the path `{repo_path}` (if not, please download the repo to this path). 
-This issue occurred in version {version}, with the corresponding base commit being {base_commit}, you need to switch to the code version corresponding to this commit.
+The repository may already exist at the path `{repo_path}`. If it doesn't, please download the repository to this path.
+All your subsequent actions should use the project path as your root directory, and you should never leave that directory to execute any actions.
+This issue occurred in version {version}, with the corresponding base commit being {base_commit}. You need to switch to the code version associated with this commit.
 
 # INSTRUCTIONS:
 Now, you're going to solve this issue on your own from the perspective of a programmer. Your terminal session has started and you're in the repository's root directory. You can use any bash commands or the special interface to help you. Edit all the files you need. 
@@ -86,7 +88,11 @@ async def run(instance, swe_result_dir):
         return
 
     repo_path = TEST_REPO_DIR / (instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"])
-
+    """
+    All your subsequent actions should use the project path as your root directory, and you should never leave that directory to execute any actions.
+    """
+    terminal = Terminal()
+    terminal.run_command(f"cd {repo_path} &&     git checkout . && git clean -n -d && git clean -f -d")
     user_requirement_and_issue = INSTANCE_TEMPLATE.format(
         issue=instance["problem_statement"],
         hints_text=instance["hints_text"],
@@ -122,10 +128,10 @@ async def async_main():
     dataset = load_hf_dataset(dataset_name_or_path=dataset_path, cache_dir=DATA_DIR, split="test")
     sample_datasets = split_dataset_equally(dataset)
     date_time = datetime.now().strftime("%m-%d")
-    round_ = "first"
+    round_ = "third"
 
     for idx, sub_dataset in enumerate(sample_datasets):
-        exp_name = f"Nano-test-{date_time}-{round_}-part-{idx}"
+        exp_name = f"nano_mgx_{date_time}_{round_}_part_{idx}"
         swe_result_dir = DEFAULT_WORKSPACE_ROOT / f"result_{config.llm.model}" / exp_name
         swe_result_dir.mkdir(parents=True, exist_ok=True)
         for instance in sub_dataset: