add: SWE Agent

2026-04-27 17:56:23 +02:00 · 2024-06-28 21:05:46 +08:00 · 2024-06-28 21:05:46 +08:00 · 9b11ac5c34
commit 9b11ac5c34
parent c0c2b5b218
17 changed files with 1575 additions and 21 deletions
--- a/metagpt/tools/libs/terminal.py
+++ b/metagpt/tools/libs/terminal.py
@ -2,6 +2,7 @@ import subprocess
 import threading
 from queue import Queue

+from metagpt.const import SWE_SETUP_PATH
 from metagpt.tools.tool_registry import register_tool
 from metagpt.utils.report import END_MARKER_VALUE, TerminalReporter

@ -26,7 +27,7 @@ class Terminal:
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
-            executable="/bin/bash"
+            executable="/bin/bash",
        )
        self.stdout_queue = Queue()
        self.observer = TerminalReporter()
@ -129,3 +130,95 @@ class Terminal:
        self.process.stdin.close()
        self.process.terminate()
        self.process.wait()
+
+
+@register_tool(include_functions=["run"])
+class Bash(Terminal):
+    """
+    A class to run bash commands directly and provides custom shell functions.
+    """
+
+    def __init__(self):
+        """init"""
+        super().__init__()
+        setup_cmd = f"source {SWE_SETUP_PATH}"
+        self.run_command(setup_cmd)
+
+    def run(self, cmd) -> str:
+        """
+        Executes a bash command.
+
+        Args:
+            cmd (str): The bash command to execute.
+
+        Returns:
+            str: The output of the command.
+
+        This method allows for executing standard bash commands as well as
+        utilizing several custom shell functions defined in the environment.
+
+        Custom Shell Functions:
+
+        - open <path> [<line_number>]
+          Opens the file at the given path in the editor. If line_number is provided,
+          the window will move to include that line.
+          Arguments:
+              path (str): The path to the file to open.
+              line_number (int, optional): The line number to move the window to.
+              If not provided, the window will start at the top of the file.
+
+        - goto <line_number>
+          Moves the window to show <line_number>.
+          Arguments:
+              line_number (int): The line number to move the window to.
+
+        - scroll_down
+          Moves the window down {WINDOW} lines.
+
+        - scroll_up
+          Moves the window up {WINDOW} lines.
+
+        - create <filename>
+          Creates and opens a new file with the given name.
+          Arguments:
+              filename (str): The name of the file to create.
+
+        - submit
+          Submits your current code and terminates the session.
+
+        - search_dir_and_preview <search_term> [<dir>]
+          Searches for search_term in all files in dir and gives their code preview
+          with line numbers. If dir is not provided, searches in the current directory.
+          Arguments:
+              search_term (str): The term to search for.
+              dir (str, optional): The directory to search in. Defaults to the current directory.
+
+        - search_file <search_term> [<file>]
+          Searches for search_term in file. If file is not provided, searches in the current open file.
+          Arguments:
+              search_term (str): The term to search for.
+              file (str, optional): The file to search in. Defaults to the current open file.
+
+        - find_file <file_name> [<dir>]
+          Finds all files with the given name in dir. If dir is not provided, searches in the current directory.
+          Arguments:
+              file_name (str): The name of the file to search for.
+              dir (str, optional): The directory to search in. Defaults to the current directory.
+
+        - edit <start_line>:<end_line> <<EOF
+          <replacement_text>
+          EOF
+          Line numbers start from 1. Replaces lines <start_line> through <end_line> (inclusive) with the given text in the open file.
+          The replacement text is terminated by a line with only EOF on it. All of the <replacement text> will be entered, so make
+          sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system
+          detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error
+          message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same
+          error message again. All code modifications made via the 'edit' command must strictly follow the PEP8 standard.
+          Arguments:
+              start_line (int): The line number to start the edit at, starting from 1.
+              end_line (int): The line number to end the edit at (inclusive), starting from 1.
+              replacement_text (str): The text to replace the current selection with, must conform to PEP8 standards.
+
+        Note: Make sure to use these functions as per their defined arguments and behaviors.
+        """
+        return self.run_command(cmd)
--- a/metagpt/tools/swe_agent_commands/_setup_default_env.sh
+++ b/metagpt/tools/swe_agent_commands/_setup_default_env.sh
@ -0,0 +1,20 @@
+# _setup_default_env.sh
+# Default Mode from SWE-Bench
+# https://github.com/princeton-nlp/SWE-agent/blob/ca54d5556b9db4f4f2be21f09530ce69a72c0305/config/configs/default_sys-env_window100-detailed_cmd_format-last_5_history-1_demos.yaml
+
+export WINDOW=100
+export OVERLAP=2
+export CURRENT_LINE=0
+export CURRENT_FILE=''
+export SEARCH_RESULTS=()
+export SEARCH_FILES=()
+export SEARCH_INDEX=0
+
+state() {
+    local working_dir="$PWD"
+    if [ ! -e "$CURRENT_FILE" ]; then
+        echo '{"open_file": "n/a", "working_dir": "'$working_dir'"}'
+    else
+        echo '{"open_file": "'$(realpath "$CURRENT_FILE")'", "working_dir": "'$working_dir'"}'
+    fi
+}
--- a/metagpt/tools/swe_agent_commands/_split_string
+++ b/metagpt/tools/swe_agent_commands/_split_string
@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import sys
+
+
+def print_flake8_output(input_string, show_line_numbers=False):
+    for value in input_string.split("\n"):
+        parts = value.split()
+        if not show_line_numbers:
+            print(f"- {' '.join(parts[1:])}")
+        else:
+            line_nums = ":".join(parts[0].split(":")[1:])
+            print(f"- {line_nums} {' '.join(parts[1:])}")
+
+
+if __name__ == "__main__":
+    lint_output = sys.argv[1]
+    print_flake8_output(lint_output)
--- a/metagpt/tools/swe_agent_commands/_split_string.py
+++ b/metagpt/tools/swe_agent_commands/_split_string.py
@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import sys
+
+
+def print_flake8_output(input_string, show_line_numbers=False):
+    for value in input_string.split("\n"):
+        parts = value.split()
+        if not show_line_numbers:
+            print(f"- {' '.join(parts[1:])}")
+        else:
+            line_nums = ":".join(parts[0].split(":")[1:])
+            print(f"- {line_nums} {' '.join(parts[1:])}")
+
+
+if __name__ == "__main__":
+    lint_output = sys.argv[1]
+    print_flake8_output(lint_output)
--- a/metagpt/tools/swe_agent_commands/defaults.sh
+++ b/metagpt/tools/swe_agent_commands/defaults.sh
@ -0,0 +1,193 @@
+_print() {
+    local total_lines=$(awk 'END {print NR}' $CURRENT_FILE)
+    echo "[File: $(realpath $CURRENT_FILE) ($total_lines lines total)]"
+    lines_above=$(jq -n "$CURRENT_LINE - $WINDOW/2" | jq '[0, .] | max | floor')
+    lines_below=$(jq -n "$total_lines - $CURRENT_LINE - $WINDOW/2" | jq '[0, .] | max | round')
+    if [ $lines_above -gt 0 ]; then
+        echo "($lines_above more lines above)"
+    fi
+    cat $CURRENT_FILE | grep -n $ | head -n $(jq -n "[$CURRENT_LINE + $WINDOW/2, $WINDOW/2] | max | floor") | tail -n $(jq -n "$WINDOW")
+    if [ $lines_below -gt 0 ]; then
+        echo "($lines_below more lines below)"
+    fi
+}
+
+_constrain_line() {
+    if [ -z "$CURRENT_FILE" ]
+    then
+        echo "No file open. Use the open command first."
+        return
+    fi
+    local max_line=$(awk 'END {print NR}' $CURRENT_FILE)
+    local half_window=$(jq -n "$WINDOW/2" | jq 'floor')
+    export CURRENT_LINE=$(jq -n "[$CURRENT_LINE, $max_line - $half_window] | min")
+    export CURRENT_LINE=$(jq -n "[$CURRENT_LINE, $half_window] | max")
+}
+
+# @yaml
+# signature: open <path> [<line_number>]
+# docstring: opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line
+# arguments:
+#   path:
+#     type: string
+#     description: the path to the file to open
+#     required: true
+#   line_number:
+#     type: integer
+#     description: the line number to move the window to (if not provided, the window will start at the top of the file)
+#     required: false
+open() {
+    if [ -z "$1" ]
+    then
+        echo "Usage: open <file>"
+        return
+    fi
+    # Check if the second argument is provided
+    if [ -n "$2" ]; then
+        # Check if the provided argument is a valid number
+        if ! [[ $2 =~ ^[0-9]+$ ]]; then
+            echo "Usage: open <file> [<line_number>]"
+            echo "Error: <line_number> must be a number"
+            return  # Exit if the line number is not valid
+        fi
+        local max_line=$(awk 'END {print NR}' $1)
+        if [ $2 -gt $max_line ]; then
+            echo "Warning: <line_number> ($2) is greater than the number of lines in the file ($max_line)"
+            echo "Warning: Setting <line_number> to $max_line"
+            local line_number=$(jq -n "$max_line")  # Set line number to max if greater than max
+        elif [ $2 -lt 1 ]; then
+            echo "Warning: <line_number> ($2) is less than 1"
+            echo "Warning: Setting <line_number> to 1"
+            local line_number=$(jq -n "1")  # Set line number to 1 if less than 1
+        else
+            local OFFSET=$(jq -n "$WINDOW/6" | jq 'floor')
+            local line_number=$(jq -n "[$2 + $WINDOW/2 - $OFFSET, 1] | max | floor")
+        fi
+    else
+        local line_number=$(jq -n "$WINDOW/2")  # Set default line number if not provided
+    fi
+
+    if [ -f "$1" ]; then
+        export CURRENT_FILE=$(realpath $1)
+        export CURRENT_LINE=$line_number
+        _constrain_line
+        _print
+    elif [ -d "$1" ]; then
+        echo "Error: $1 is a directory. You can only open files. Use cd or ls to navigate directories."
+    else
+        echo "File $1 not found"
+    fi
+}
+
+# @yaml
+# signature: goto <line_number>
+# docstring: moves the window to show <line_number>
+# arguments:
+#   line_number:
+#     type: integer
+#     description: the line number to move the window to
+#     required: true
+goto() {
+    if [ $# -gt 1 ]; then
+        echo "goto allows only one line number at a time."
+        return
+    fi
+    if [ -z "$CURRENT_FILE" ]
+    then
+        echo "No file open. Use the open command first."
+        return
+    fi
+    if [ -z "$1" ]
+    then
+        echo "Usage: goto <line>"
+        return
+    fi
+    if ! [[ $1 =~ ^[0-9]+$ ]]
+    then
+        echo "Usage: goto <line>"
+        echo "Error: <line> must be a number"
+        return
+    fi
+    local max_line=$(awk 'END {print NR}' $CURRENT_FILE)
+    if [ $1 -gt $max_line ]
+    then
+        echo "Error: <line> must be less than or equal to $max_line"
+        return
+    fi
+    local OFFSET=$(jq -n "$WINDOW/6" | jq 'floor')
+    export CURRENT_LINE=$(jq -n "[$1 + $WINDOW/2 - $OFFSET, 1] | max | floor")
+    _constrain_line
+    _print
+}
+
+# @yaml
+# signature: scroll_down
+# docstring: moves the window down {WINDOW} lines
+scroll_down() {
+    if [ -z "$CURRENT_FILE" ]
+    then
+        echo "No file open. Use the open command first."
+        return
+    fi
+    export CURRENT_LINE=$(jq -n "$CURRENT_LINE + $WINDOW - $OVERLAP")
+    _constrain_line
+    _print
+}
+
+# @yaml
+# signature: scroll_up
+# docstring: moves the window down {WINDOW} lines
+scroll_up() {
+    if [ -z "$CURRENT_FILE" ]
+    then
+        echo "No file open. Use the open command first."
+        return
+    fi
+    export CURRENT_LINE=$(jq -n "$CURRENT_LINE - $WINDOW + $OVERLAP")
+    _constrain_line
+    _print
+}
+
+# @yaml
+# signature: create <filename>
+# docstring: creates and opens a new file with the given name
+# arguments:
+#   filename:
+#     type: string
+#     description: the name of the file to create
+#     required: true
+create() {
+    if [ -z "$1" ]; then
+        echo "Usage: create <filename>"
+        return
+    fi
+
+    # Check if the file already exists
+    if [ -e "$1" ]; then
+        echo "Error: File '$1' already exists."
+		open "$1"
+        return
+    fi
+
+    # Create the file an empty new line
+    printf "\n" > "$1"
+    # Use the existing open command to open the created file
+    open "$1"
+}
+
+# @yaml
+# signature: submit
+# docstring: submits your current code and terminates the session
+submit() {
+    # Check if the patch file exists and is non-empty
+    if [ -s "$SWE_CMD_WORK_DIR/test.patch" ]; then
+        # Apply the patch in reverse
+        git apply -R < "$SWE_CMD_WORK_DIR/test.patch"
+    fi
+
+    git add -A
+    git diff --cached > model.patch
+    echo "<<SUBMISSION||"
+    cat model.patch
+    echo "||SUBMISSION>>"
+}
--- a/metagpt/tools/swe_agent_commands/edit_linting.sh
+++ b/metagpt/tools/swe_agent_commands/edit_linting.sh
@ -0,0 +1,165 @@
+# @yaml
+# signature: |-
+#   edit <start_line>:<end_line> <<EOF
+#   <replacement_text>
+#   EOF
+# docstring: Line numbers start from 1. Replaces lines <start_line> through <end_line> (inclusive) with the given text in the open file. The replacement text is terminated by a line with only EOF on it. All of the <replacement text> will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again. All code modifications made via the 'edit' command must strictly follow the PEP8 standard.
+# end_name: EOF
+# arguments:
+#   start_line:
+#     type: integer
+#     description: the line number to start the edit at, start from 1.
+#     required: true
+#   end_line:
+#     type: integer
+#     description: the line number to end the edit at (inclusive),  start from 1.
+#     required: true
+#   replacement_text:
+#     type: string
+#     description: the text to replace the current selection with must conform to PEP8 standards.
+#     required: true
+edit() {
+    if [ -z "$CURRENT_FILE" ]
+    then
+        echo 'No file open. Use the `open` command first.'
+        return
+    fi
+
+    local start_line="$(echo $1: | cut -d: -f1)"
+    local end_line="$(echo $1: | cut -d: -f2)"
+
+    if [ -z "$start_line" ] || [ -z "$end_line" ]
+    then
+        echo "Usage: edit <start_line>:<end_line>"
+        return
+    fi
+
+    local re='^[0-9]+$'
+    if ! [[ $start_line =~ $re ]]; then
+        echo "Usage: edit <start_line>:<end_line>"
+        echo "Error: start_line must be a number"
+        return
+    fi
+    if ! [[ $end_line =~ $re ]]; then
+        echo "Usage: edit <start_line>:<end_line>"
+        echo "Error: end_line must be a number"
+        return
+    fi
+
+    # Run linter for original file
+    if [[ $CURRENT_FILE == *.py ]]; then
+        original_lint_output=$(flake8 --isolated --select=F821,F822,F831,E112,E113,E999,E902 "$CURRENT_FILE" 2>&1)
+    else
+        # do nothing
+        original_lint_output=""
+    fi
+
+
+    # Bash array starts at 0, so let's adjust
+    local start_line=$((start_line - 1))
+    local end_line=$((end_line))
+
+    local line_count=0
+    local replacement=()
+    while IFS= read -r line
+    do
+        replacement+=("$line")
+        ((line_count++))
+    done
+
+    # Create a backup of the current file
+    cp "$CURRENT_FILE" "$SWE_CMD_WORK_DIR/$(basename "$CURRENT_FILE")_backup"
+
+    # Read the file line by line into an array
+    mapfile -t lines < "$CURRENT_FILE"
+    local new_lines=("${lines[@]:0:$start_line}" "${replacement[@]}" "${lines[@]:$((end_line))}")
+    # Write the new stuff directly back into the original file
+    printf "%s\n" "${new_lines[@]}" >| "$CURRENT_FILE"
+
+    # Run linter
+    if [[ $CURRENT_FILE == *.py ]]; then
+        lint_output=$(flake8 --isolated --select=F821,F822,F831,E112,E113,E999,E902 "$CURRENT_FILE" 2>&1)
+    else
+        # do nothing
+        lint_output=""
+    fi
+
+    # Create temporary files
+    temp_original=$(mktemp)
+    temp_modified=$(mktemp)
+
+    # Remove line numbers and save cleaned outputs to temporary files
+    echo "$original_lint_output" | sed 's/:[0-9]\+:[0-9]\+:/:LINE:COL:/g' > "$temp_original"
+    echo "$lint_output" | sed 's/:[0-9]\+:[0-9]\+:/:LINE:COL:/g' > "$temp_modified"
+
+
+    # Compare the temporary files
+    if cmp -s "$temp_original" "$temp_modified"; then
+        lint_output=""
+    else
+        echo "Linter output for the original file:"
+        cat "$temp_original"
+        # print linter result
+        echo "Linter output for the modified file:"
+        cat "$temp_modified"
+    fi
+
+    # Clean up temporary files
+    rm "$temp_original" "$temp_modified"
+
+    # if there is no output, then the file is good
+    if [ -z "$lint_output" ]; then
+        export CURRENT_LINE=$start_line
+        _constrain_line
+        _print
+
+        echo "File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary."
+    else
+        echo "Your proposed edit has introduced new syntax error(s). Please understand the fixes and retry your edit command."
+        echo ""
+        echo "ERRORS:"
+        _split_string "$lint_output"
+        echo ""
+
+        # Save original values
+        original_current_line=$CURRENT_LINE
+        original_window=$WINDOW
+
+        # Update values
+        export CURRENT_LINE=$(( (line_count / 2) + start_line )) # Set to "center" of edit
+        export WINDOW=$((line_count + 10)) # Show +/- 5 lines around edit
+
+        echo "This is how your edit would have looked if applied"
+        echo "-------------------------------------------------"
+        _constrain_line
+        _print
+        echo "-------------------------------------------------"
+        echo ""
+
+
+        # Restoring CURRENT_FILE to original contents.
+        cp "$SWE_CMD_WORK_DIR/$(basename "$CURRENT_FILE")_backup" "$CURRENT_FILE"
+
+        export CURRENT_LINE=$(( ((end_line - start_line + 1) / 2) + start_line ))
+        export WINDOW=$((end_line - start_line + 10))
+
+        echo "This is the original code before your edit"
+        echo "-------------------------------------------------"
+        _constrain_line
+        _print
+        echo "-------------------------------------------------"
+#
+
+        # Restore original values
+        export CURRENT_LINE=$original_current_line
+        export WINDOW=$original_window
+
+        echo "Your changes have NOT been applied. Please fix your edit command and try again."
+        echo "You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code."
+        echo "DO NOT re-run the same failed edit command. Running it again will lead to the same error."
+    fi
+
+
+    # Remove backup file
+    rm -f "$SWE_CMD_WORK_DIR/$(basename "$CURRENT_FILE")_backup"
+}
--- a/metagpt/tools/swe_agent_commands/execute_env_utils.py
+++ b/metagpt/tools/swe_agent_commands/execute_env_utils.py
@ -0,0 +1,359 @@
+import json
+import os
+import re
+import select
+import shlex
+import subprocess
+import tarfile
+import tempfile
+import threading
+import time
+import traceback
+from io import BytesIO
+from subprocess import PIPE, STDOUT
+from typing import Tuple
+
+import docker
+from datasets import load_dataset, load_from_disk
+from ghapi.all import GhApi
+
+from metagpt.logs import logger
+
+LOGGER_NAME = "intercode"
+START_UP_DELAY = 5
+TIMEOUT_DURATION = 25
+GITHUB_ISSUE_URL_PATTERN = re.compile(r"github\.com\/(.*?)\/(.*?)\/issues\/(\d+)")
+
+
+def is_from_github_url(data_path: str):
+    return GITHUB_ISSUE_URL_PATTERN.search(data_path) is not None
+
+
+def copy_file_to_container(container, contents, container_path):
+    """
+    Copies a given string into a Docker container at a specified path.
+
+    Args:
+    - container: Docker SDK container object.
+    - contents: The string to copy into the container.
+    - container_path: The path inside the container where the string should be copied to.
+
+    Returns:
+    - None
+    """
+    temp_file_name = None
+
+    try:
+        # Create a temporary file
+        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+            temp_file_name = temp_file.name
+            # Write the string to the temporary file and ensure it's written to disk
+            temp_file.write(contents.encode("utf-8"))
+            temp_file.flush()
+            os.fsync(temp_file.fileno())
+
+        # Create a TAR archive in memory containing the temporary file
+        with tempfile.NamedTemporaryFile():
+            with open(temp_file_name, "rb") as temp_file:
+                # Prepare the TAR archive
+                with BytesIO() as tar_stream:
+                    with tarfile.open(fileobj=tar_stream, mode="w") as tar:
+                        tar_info = tarfile.TarInfo(name=os.path.basename(container_path))
+                        tar_info.size = os.path.getsize(temp_file_name)
+                        tar.addfile(tarinfo=tar_info, fileobj=temp_file)
+                    tar_stream.seek(0)
+                    # Copy the TAR stream to the container
+                    container.put_archive(path=os.path.dirname(container_path), data=tar_stream.read())
+
+    except Exception as e:
+        logger.error(f"An error occurred: {e}")
+        logger.error(traceback.format_exc())
+    finally:
+        # Cleanup: Remove the temporary file if it was created
+        if temp_file_name and os.path.exists(temp_file_name):
+            os.remove(temp_file_name)
+
+
+def read_with_timeout(container, pid_func, timeout_duration):
+    """
+    Read data from a subprocess with a timeout.
+    This function uses a file descriptor to read data from the subprocess in a non-blocking way.
+
+    Args:
+        container (subprocess.Popen): The subprocess container.
+        pid_func (function): A function that returns a list of process IDs (except the PID of the main process).
+        timeout_duration (int): The timeout duration in seconds.
+
+    Returns:
+        str: The data read from the subprocess, stripped of trailing newline characters.
+
+    Raises:
+        TimeoutError: If the timeout duration is reached while reading from the subprocess.
+    """
+    buffer = b""
+    fd = container.stdout.fileno()
+    end_time = time.time() + timeout_duration
+
+    while time.time() < end_time:
+        pids = pid_func()
+        if len(pids) > 0:
+            # There are still PIDs running
+            time.sleep(0.05)
+            continue
+        ready_to_read, _, _ = select.select([fd], [], [], 0.1)
+        if ready_to_read:
+            data = os.read(fd, 4096)
+            if data:
+                buffer += data
+        else:
+            # No more data to read
+            break
+        time.sleep(0.05)  # Prevents CPU hogging
+
+    if container.poll() is not None:
+        raise RuntimeError("Subprocess exited unexpectedly.\nCurrent buffer: {}".format(buffer.decode()))
+    if time.time() >= end_time:
+        raise TimeoutError(
+            "Timeout reached while reading from subprocess.\nCurrent buffer: {}\nRunning PIDs: {}".format(
+                buffer.decode(), pids
+            )
+        )
+    return buffer.decode()
+
+
+class timeout:
+    def __init__(self, seconds=TIMEOUT_DURATION, error_message="Timeout"):
+        self.seconds = seconds
+        self.error_message = error_message
+        self.timer = None
+        self.timeout_occurred = False
+
+    def handle_timeout(self, signum=None, frame=None):
+        self.timeout_occurred = True
+
+    def __enter__(self):
+        # signal.signal(signal.SIGALRM, self.handle_timeout)
+        # signal.alarm(self.seconds)
+        self.timer = threading.Timer(self.seconds, self.handle_timeout)
+        self.timer.start()
+        return self
+
+    def __exit__(self, type, value, traceback):
+        self.timer.cancel()
+        if self.timeout_occurred:
+            print(self.error_message)  # 处理超时的逻辑
+
+
+def get_background_pids(container_obj):
+    pids = container_obj.exec_run("ps -eo pid,comm --no-headers").output.decode().split("\n")
+    pids = [x.split() for x in pids if x]
+    pids = [x for x in pids if x[1] not in {"ps"} and x[0] != "1"]
+    bash_pids = [x for x in pids if x[1] == "bash"]
+    other_pids = [x for x in pids if x[1] not in {"bash"}]
+    return bash_pids, other_pids
+
+
+def _get_non_persistent_container(ctr_name: str, image_name: str) -> Tuple[subprocess.Popen, set]:
+    startup_cmd = [
+        "docker",
+        "run",
+        "-i",
+        "--rm",
+        "--name",
+        ctr_name,
+        image_name,
+        "/bin/bash",
+        "-l",
+        "-m",
+    ]
+
+    logger.debug("Starting container with command: %s", shlex.join(startup_cmd))
+    container = subprocess.Popen(
+        startup_cmd,
+        stdin=PIPE,
+        stdout=PIPE,
+        stderr=STDOUT,
+        text=True,
+        bufsize=1,  # line buffered
+    )
+    time.sleep(START_UP_DELAY)
+    # try to read output from container setup (usually an error), timeout if no output
+    try:
+        with timeout(seconds=2):
+            output = container.stdout.read()
+            if output:
+                logger.error(f"Unexpected container setup output: {output}")
+    except TimeoutError:
+        pass
+    return container, {
+        "1",
+    }  # bash PID is always 1 for non-persistent containers
+
+
+def _get_persistent_container(ctr_name: str, image_name: str, persistent: bool = False) -> Tuple[subprocess.Popen, set]:
+    client = docker.from_env()
+    containers = client.containers.list(all=True, filters={"name": ctr_name})
+    if ctr_name in [c.name for c in containers]:
+        container_obj = client.containers.get(ctr_name)
+        if container_obj.status in {"created"}:
+            container_obj.start()
+        elif container_obj.status in {"running"}:
+            pass
+        elif container_obj.status in {"exited"}:
+            container_obj.restart()
+        elif container_obj.status in {"paused"}:
+            container_obj.unpause()
+        else:
+            raise RuntimeError(f"Unexpected container status: {container_obj.status}")
+    else:
+        container_obj = client.containers.run(
+            image_name,
+            command="/bin/bash -l -m",
+            name=ctr_name,
+            stdin_open=True,
+            tty=True,
+            detach=True,
+            auto_remove=not persistent,
+        )
+        container_obj.start()
+    startup_cmd = [
+        "docker",
+        "exec",
+        "-i",
+        ctr_name,
+        "/bin/bash",
+        "-l",
+        "-m",
+    ]
+    logger.debug("Starting container with command: %s", shlex.join(startup_cmd))
+    container = subprocess.Popen(
+        startup_cmd,
+        stdin=PIPE,
+        stdout=PIPE,
+        stderr=STDOUT,
+        text=True,
+        bufsize=1,  # line buffered
+    )
+    time.sleep(START_UP_DELAY)
+    # try to read output from container setup (usually an error), timeout if no output
+    try:
+        with timeout(seconds=2):
+            output = container.stdout.read()
+            if output:
+                logger.error(f"Unexpected container setup output: {output}")
+    except TimeoutError:
+        pass
+    # Get the process IDs of the container
+    # There should be at least a head process and possibly one child bash process
+    bash_pids, other_pids = get_background_pids(container_obj)
+    bash_pid = 1
+    if len(bash_pids) == 1:
+        bash_pid = bash_pids[0][0]
+    elif len(bash_pids) > 1 or len(other_pids) > 0:
+        raise RuntimeError(
+            f"Detected alien processes attached or running. Please ensure that no other agents are running on this container. PIDs: {bash_pids}, {other_pids}"
+        )
+    return container, set(
+        map(
+            str,
+            [
+                bash_pid,
+                1,
+            ],
+        )
+    )
+
+
+def get_container(ctr_name: str, image_name: str, persistent: bool = False) -> subprocess.Popen:
+    """
+    Get a container object for a given container name and image name
+
+    Arguments:
+        ctr_name (str): Name of container
+        image_name (str): Name of image
+        persistent (bool): Whether to use a persistent container or not
+    Returns:
+        Container object
+    """
+    if persistent:
+        return _get_persistent_container(ctr_name, image_name)
+    else:
+        return _get_non_persistent_container(ctr_name, image_name)
+
+
+def get_commit(api: GhApi, owner: str, repo: str, base_commit: str = None):
+    if base_commit:
+        commit = api.repos.get_commit(owner, repo, base_commit)
+    else:
+        commit = api.repos.list_commits(owner, repo)[0]
+    return commit
+
+
+class InvalidGithubURL(ValueError):
+    ...
+
+
+def parse_gh_issue_url(issue_url: str) -> Tuple[str, str, str]:
+    """Return owner, repo, issue number from issue url"""
+    match = GITHUB_ISSUE_URL_PATTERN.search(issue_url)
+    if not match:
+        raise InvalidGithubURL(f"Invalid GitHub issue URL: {issue_url}")
+    res = match.groups()
+    assert len(res) == 3
+    return tuple(res)  # type: ignore
+
+
+def get_instances(file_path: str, base_commit: str = None, split: str = None, token: str = None):
+    """
+    Getter function for handling json, jsonl files
+
+    Arguments:
+        file_path (str): Path to file
+    Returns:
+        List of instances
+    """
+    # If file_path is a directory, attempt load from disk
+    if os.path.isdir(file_path):
+        dataset_or_dict = load_from_disk(file_path)
+        if isinstance(dataset_or_dict, dict):
+            return dataset_or_dict[split]
+        return dataset_or_dict
+
+    # If file_path is a github issue url, fetch the issue and return a single instance
+    if is_from_github_url(file_path):
+        try:
+            owner, repo, issue_number = parse_gh_issue_url(file_path)
+        except InvalidGithubURL:
+            pass
+        else:
+            record = dict()
+            api = GhApi(token=token)
+            issue = api.issues.get(owner, repo, issue_number)
+            title = issue.title if issue.title else ""
+            body = issue.body if issue.body else ""
+            text = f"{title}\n{body}\n"
+            record["repo"] = f"{owner}/{repo}"
+            record["base_commit"] = base_commit if base_commit else get_commit(api, owner, repo, base_commit).sha
+            record["version"] = record["base_commit"][:7]
+            record["problem_statement"] = text
+            record["instance_id"] = f"{owner}__{repo}-i{issue_number}"
+            return [
+                record,
+            ]
+    elif base_commit is not None:
+        raise ValueError("base_commit must be None if data_path is not a github issue url")
+
+    # If file_path is a file, load the file
+    if file_path.endswith(".json"):
+        return json.load(open(file_path))
+    if file_path.endswith(".jsonl"):
+        return [json.loads(x) for x in open(file_path, "r").readlines()]
+
+    # Attempt load from HF datasets as a last resort
+    try:
+        return load_dataset(file_path, split=split)
+    except:
+        raise ValueError(
+            f"Could not load instances from {file_path}. "
+            "Please ensure --data_path is a GitHub URL, a SWE-bench HuggingFace dataset, or a JSON/JSONL file."
+        )
--- a/metagpt/tools/swe_agent_commands/search.sh
+++ b/metagpt/tools/swe_agent_commands/search.sh
@ -0,0 +1,245 @@
+# @yaml
+# signature: search_dir_and_preview <search_term> [<dir>]
+# docstring: searches for search_term in all files in dir and give their code preview with line number if you think need a first look. The output will vary depending on the length of the search results, but the file path, line number & corresponding code or number of occurrences will always be output. If dir is not provided, searches in the current directory
+# arguments:
+#   search_term:
+#     type: string
+#     description: the term to search for
+#     required: true
+#   dir:
+#     type: string
+#     description: the directory to search in (if not provided, searches in the current directory)
+#     required: false
+search_dir_and_preview() {
+    if [ $# -eq 1 ]; then
+        local search_term="$1"
+        local dir="./"
+    elif [ $# -eq 2 ]; then
+        local search_term="$1"
+        if [ -d "$2" ]; then
+            local dir="$2"
+        else
+            echo "Directory $2 not found"
+            return
+        fi
+    else
+        echo "Usage: search_dir_and_preview <search_term> [<dir>]"
+        return
+    fi
+    dir=$(realpath "$dir")
+    local matches=$(find "$dir" -type f -path '*.py' -exec grep -nIH -- "$search_term" {} + | cut -d: -f1 | sort | uniq -c)
+<<COMMENT
+    metches exmaple: 3 xx/xx/test_file.py
+COMMENT
+
+    local matches_with_line=$(find "$dir" -type f -path '*.py' -exec grep -nIH -- "$search_term" {} + | sort | uniq -c)
+<<COMMENT
+    matches_with_line example: 1 xx/xx/test_file.py:20: def func_test()
+COMMENT
+
+    # if no matches, return
+    if [ -z "$matches" ]; then
+        echo "No matches found for \"$search_term\" in $dir"
+        return
+    fi
+    # Calculate total number of matches
+    local num_matches=$(echo "$matches" | awk '{sum+=$1} END {print sum}')
+
+    # calculate total number of files matched
+    local num_files=$(echo "$matches" | wc -l | awk '{$1=$1; print $0}')
+    # if num_files is > 100, print an error
+    if [ $num_files -gt 100 ]; then
+        echo "More than $num_files files matched for \"$search_term\" in $dir. Please narrow your search."
+        return
+    fi
+
+    match_with_cnt=$(echo "$matches" | awk '{$2=$2; gsub(/^\.+\/+/, "./", $2); print $2 " ("$1" matches)"}')
+<<COMMENT
+    match_with_cnt example: xx/xx/test_file.py (3 matches)
+COMMENT
+    match_res=""
+    match_res+="Found $num_matches matches for \"$search_term\" in $dir:\n"
+    match_res+="$match_with_cnt\n"
+    match_res+="End of matches for \"$search_term\" in $dir"
+
+    match_line_res=""
+<<COMMENT
+    match_line_res example: xx/xx/test_file.py
+                            10: def test_func()
+                            20: a = test_func()
+COMMENT
+    match_line_res+="Found $num_matches matches for \"$search_term\" in $dir:\n"
+    # transform matches_with_line format into match_line_res
+    transform_res=$(echo "$matches_with_line" | awk -F ':' '
+    {
+        code="";
+        for (i=3; i<=NF; i++) {
+            code = code $i ":";
+        }
+        if (code != "") {
+          code=substr(code, 1, length(code)-1)
+        }
+
+        split($1, arr, " ");
+        file=arr[2];
+        line=$2;
+
+        if (file != current_file) {
+            if (current_file != "") {
+                print "";
+            }
+            print file;
+            current_file = file;
+        }
+        print line ":" code;
+    }
+    ')
+
+    match_line_res+="$transform_res\n"
+    match_line_res+="End of matches for \"$search_term\" in $dir"
+
+    matched_files=$(find "$dir" -type f -path '*.py' -exec grep -nIH -- "$search_term" {} + | cut -d: -f1)
+    matched_lineno=$(find "$dir" -type f -path '*.py' -exec grep -nIH -- "$search_term" {} + | cut -d: -f2)
+    files_arr=($matched_files)
+    lineno_arr=($matched_lineno)
+    length=${#files_arr[@]}
+
+    preview_res=""
+    preview_res+="Found $num_matches matches for \"$search_term\" in $dir. Founded files and there code preview with line number are under below\n"
+    for (( idx=0; idx<$length; idx++ )); do
+        file_abs_path=${files_arr[$idx]}
+        lineno=${lineno_arr[$idx]}
+        # preview head 3 lines
+        lineno_sub=$(($lineno-3))
+        if (( $lineno_sub < 0 )); then
+            head_start_lineno=0
+        else
+            head_start_lineno=$lineno_sub
+        fi
+        head_content=$(sed -n "$(($head_start_lineno)),$(($lineno-1))p" "$file_abs_path" | nl -w 1 -ba -s ":" -v $head_start_lineno)
+
+        # preview tail 5+1 lines, including the `lineno` line
+        tail_content=$(sed -n "$(($lineno)),$(($lineno+5))p" "$file_abs_path" | nl -w 1 -ba -s ":" -v $lineno)
+
+        preview_res+="\nFounded #$idx code block in $file_abs_path\n"
+        preview_res+="$head_content\n"
+        preview_res+="$tail_content\n"
+    done
+
+    preview_res+="End of matches for \"$search_term\" in $dir"
+    preview_res_len=${#preview_res}
+    if [ $preview_res_len -gt 20000 ]; then
+        echo -e "$match_res"
+    elif [ $preview_res_len -gt 10000 ]; then
+        echo -e "$match_line_res"
+    else
+        echo -e "$preview_res"
+    fi
+}
+
+
+# @yaml
+# signature: search_file <search_term> [<file>]
+# docstring: searches for search_term in file. If file is not provided, searches in the current open file
+# arguments:
+#   search_term:
+#     type: string
+#     description: the term to search for
+#     required: true
+#   file:
+#     type: string
+#     description: the file to search in (if not provided, searches in the current open file)
+#     required: false
+search_file() {
+    # Check if the first argument is provided
+    if [ -z "$1" ]; then
+        echo "Usage: search_file <search_term> [<file>]"
+        return
+    fi
+    # Check if the second argument is provided
+    if [ -n "$2" ]; then
+        # Check if the provided argument is a valid file
+        if [ -f "$2" ]; then
+            local file="$2"  # Set file if valid
+        else
+            echo "Usage: search_file <search_term> [<file>]"
+            echo "Error: File name $2 not found. Please provide a valid file name."
+            return  # Exit if the file is not valid
+        fi
+    else
+        # Check if a file is open
+        if [ -z "$CURRENT_FILE" ]; then
+            echo "No file open. Use the open command first."
+            return  # Exit if no file is open
+        fi
+        local file="$CURRENT_FILE"  # Set file to the current open file
+    fi
+    local search_term="$1"
+    file=$(realpath "$file")
+    # Use grep to directly get the desired formatted output
+    local matches=$(grep -nH -- "$search_term" "$file")
+    # Check if no matches were found
+    if [ -z "$matches" ]; then
+        echo "No matches found for \"$search_term\" in $file"
+        return
+    fi
+    # Calculate total number of matches
+    local num_matches=$(echo "$matches" | wc -l | awk '{$1=$1; print $0}')
+
+    # calculate total number of lines matched
+    local num_lines=$(echo "$matches" | cut -d: -f1 | sort | uniq | wc -l | awk '{$1=$1; print $0}')
+    # if num_lines is > 100, print an error
+    if [ $num_lines -gt 100 ]; then
+        echo "More than $num_lines lines matched for \"$search_term\" in $file. Please narrow your search."
+        return
+    fi
+
+    # Print the total number of matches and the matches themselves
+    echo "Found $num_matches matches for \"$search_term\" in $file:"
+    echo "$matches" | cut -d: -f1-2 | sort -u -t: -k2,2n | while IFS=: read -r filename line_number; do
+        echo "Line $line_number:$(sed -n "${line_number}p" "$file")"
+    done
+    echo "End of matches for \"$search_term\" in $file"
+}
+
+# @yaml
+# signature: find_file <file_name> [<dir>]
+# docstring: finds all files with the given name in dir. If dir is not provided, searches in the current directory
+# arguments:
+#   file_name:
+#     type: string
+#     description: the name of the file to search for
+#     required: true
+#   dir:
+#     type: string
+#     description: the directory to search in (if not provided, searches in the current directory)
+#     required: false
+find_file() {
+    if [ $# -eq 1 ]; then
+        local file_name="$1"
+        local dir="./"
+    elif [ $# -eq 2 ]; then
+        local file_name="$1"
+        if [ -d "$2" ]; then
+            local dir="$2"
+        else
+            echo "Directory $2 not found"
+            return
+        fi
+    else
+        echo "Usage: find_file <file_name> [<dir>]"
+        return
+    fi
+
+    dir=$(realpath "$dir")
+    local matches=$(find "$dir" -type f -name "$file_name")
+    # if no matches, return
+    if [ -z "$matches" ]; then
+        echo "No matches found for \"$file_name\" in $dir"
+        return
+    fi
+    # Calculate total number of matches
+    local num_matches=$(echo "$matches" | wc -l | awk '{$1=$1; print $0}')
+    echo "Found $num_matches matches for \"$file_name\" in $dir:"
+    echo "$matches" | awk '{print $0}'
+}
--- a/metagpt/tools/swe_agent_commands/setup_default.sh
+++ b/metagpt/tools/swe_agent_commands/setup_default.sh
@ -0,0 +1,20 @@
+#!/bin/bash
+
+pip install flake8
+
+# Default Mode from SWE-Bench
+# https://github.com/princeton-nlp/SWE-agent/blob/ca54d5556b9db4f4f2be21f09530ce69a72c0305/config/configs/default_sys-env_window100-detailed_cmd_format-last_5_history-1_demos.yaml#L103-L106
+SCRIPT_PATH="${BASH_SOURCE[0]}"  # use BASH_SOURCE to avoid the influence of `source *.sh which cause CUR_DIR=/bin`
+CUR_DIR=$(dirname $(readlink -f $SCRIPT_PATH))
+REPO_ROOT_DIR=$CUR_DIR"/../../.."
+source $REPO_ROOT_DIR/metagpt/tools/swe_agent_commands/_setup_default_env.sh
+
+# make _split_string (py) available
+export PATH=$PATH:$REPO_ROOT_DIR/metagpt/tools/swe_agent_commands
+
+source $REPO_ROOT_DIR/metagpt/tools/swe_agent_commands/defaults.sh
+source $REPO_ROOT_DIR/metagpt/tools/swe_agent_commands/search.sh
+source $REPO_ROOT_DIR/metagpt/tools/swe_agent_commands/edit_linting.sh
+
+export SWE_CMD_WORK_DIR="$REPO_ROOT_DIR/workspace/swe_agent_workdir"
+#sudo chmod 777 $REPO_ROOT_DIR/workspace/swe_agent_workdir
--- a/metagpt/tools/swe_agent_commands/swe_agent_utils.py
+++ b/metagpt/tools/swe_agent_commands/swe_agent_utils.py
@ -0,0 +1,36 @@
+from pathlib import Path
+
+import numpy as np
+from datasets import load_dataset, load_from_disk
+
+
+def extract_patch(command_output):
+    patch_lines = []
+    recording = False
+    for line in command_output.split("\n"):
+        if line.startswith("diff --git"):
+            recording = True
+        if recording:
+            patch_lines.append(line)
+    return "\n".join(patch_lines)
+
+
+def load_hf_dataset(dataset_name_or_path: str, cache_dir, split: str = "test", existing_ids: list = []):
+    if Path(dataset_name_or_path).exists():
+        dataset = load_from_disk(dataset_name_or_path)
+    else:
+        dataset = load_dataset(dataset_name_or_path, cache_dir=cache_dir)
+    print(dataset)
+    if split not in dataset:
+        raise ValueError(f"Invalid split {split} for dataset {dataset_name_or_path}")
+    dataset = dataset[split]
+    np.array(list(map(len, dataset["instance_id"])))
+
+    if existing_ids:
+        dataset = dataset.filter(
+            lambda x: x["instance_id"] not in existing_ids,
+            desc="Filtering out existing ids",
+            load_from_cache_file=False,
+        )
+
+    return dataset