From 9b11ac5c34f00d57b234783e4eef0fedf669ebb6 Mon Sep 17 00:00:00 2001 From: seeker Date: Fri, 28 Jun 2024 21:05:46 +0800 Subject: [PATCH 01/10] add: SWE Agent --- metagpt/const.py | 3 + metagpt/prompts/di/swe.py | 144 +++++++ metagpt/roles/di/role_zero.py | 62 ++- metagpt/roles/di/swe.py | 74 ++++ metagpt/tools/libs/terminal.py | 95 ++++- .../swe_agent_commands/_setup_default_env.sh | 20 + .../tools/swe_agent_commands/_split_string | 19 + .../tools/swe_agent_commands/_split_string.py | 19 + metagpt/tools/swe_agent_commands/defaults.sh | 193 ++++++++++ .../tools/swe_agent_commands/edit_linting.sh | 165 ++++++++ .../swe_agent_commands/execute_env_utils.py | 359 ++++++++++++++++++ metagpt/tools/swe_agent_commands/search.sh | 245 ++++++++++++ .../tools/swe_agent_commands/setup_default.sh | 20 + .../swe_agent_commands/swe_agent_utils.py | 36 ++ metagpt/utils/token_counter.py | 4 + requirements.txt | 2 +- tests/metagpt/roles/di/run_swe.py | 136 +++++++ 17 files changed, 1575 insertions(+), 21 deletions(-) create mode 100644 metagpt/prompts/di/swe.py create mode 100644 metagpt/roles/di/swe.py create mode 100644 metagpt/tools/swe_agent_commands/_setup_default_env.sh create mode 100755 metagpt/tools/swe_agent_commands/_split_string create mode 100755 metagpt/tools/swe_agent_commands/_split_string.py create mode 100644 metagpt/tools/swe_agent_commands/defaults.sh create mode 100644 metagpt/tools/swe_agent_commands/edit_linting.sh create mode 100644 metagpt/tools/swe_agent_commands/execute_env_utils.py create mode 100644 metagpt/tools/swe_agent_commands/search.sh create mode 100644 metagpt/tools/swe_agent_commands/setup_default.sh create mode 100644 metagpt/tools/swe_agent_commands/swe_agent_utils.py create mode 100644 tests/metagpt/roles/di/run_swe.py diff --git a/metagpt/const.py b/metagpt/const.py index 6e823d56c..c78a22641 100644 --- a/metagpt/const.py +++ b/metagpt/const.py @@ -149,3 +149,6 @@ METAGPT_REPORTER_DEFAULT_URL = os.environ.get("METAGPT_REPORTER_URL", "") # Metadata defines AGENT = "agent" + +# SWE agent +SWE_SETUP_PATH = METAGPT_ROOT / "metagpt/tools/swe_agent_commands/setup_default.sh" diff --git a/metagpt/prompts/di/swe.py b/metagpt/prompts/di/swe.py new file mode 100644 index 000000000..15e917e11 --- /dev/null +++ b/metagpt/prompts/di/swe.py @@ -0,0 +1,144 @@ +SWE_AGENT_SYSTEM_TEMPLATE = """ +SETTING: You are an autonomous programmer, and you're working directly in the command line with a special interface. + +The special interface consists of a file editor that shows you {WINDOW} lines of a file at a time. + +Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. Pay attention to the original indentation when replacing the function. +If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. + +Always review your changes post-edit to ensure they accurately reflect your intentions. If the changes are not as desired, don't hesitate to issue another command to correct them. + +Your output should always contain a section of reasoning and a command described in JSON format. The command must always contain command_name and args fields. The command_name field should always be Bash.run, and the args field should always include a cmd field containing the bash command, as shown in the example below: + +First I'll start by using ls to see what files are in the current directory. Then maybe we can look at some relevant files to see what they look like. +```json +{{ + "command_name": "Bash.run", + "args": {{ + "cmd": "ls -a" + }} +}} + +``` + + +You should only include a *SINGLE* command in the command section and then wait for a response from the shell before continuing with more discussion and commands. Everything you include in the DISCUSSION section will be saved for future reference. +If you'd like to issue two commands at once, PLEASE DO NOT DO THAT! Please instead first submit just the first command, and then after receiving a response you'll be able to issue the second command. +You're free to use any other bash commands you want (e.g. find, grep, cat, ls, cd) in addition to the special commands listed above. +You should carefully observe the behavior and results of the previous action, and avoid triggering repeated errors. + +However, the environment does NOT support interactive session commands (e.g. python, vim), so please do not invoke them. +""" + +MINIMAL_EXAMPLE = """ +## Example of a actions trajectory +User Requirement and Issue: Fix the bug in the repo. Because the environment is not available, you DO NOT need to run and modify any existing test case files or add new test case files to ensure that the bug is fixed. + +### Locate issue(Require): Locate the issue in the code by searching for the relevant file, function, or class and open the file to view the code. +cd /workspace/django__django_3.0 +-> +search_dir_and_preview ASCIIUsernameValidator +-> +open /workspace/django__django_3.0/django/contrib/auth/validators.py +-> +### Fix the Bug(Require): Fix the bug in the code by editing the relevant function, class or code snippet. +edit 10:20 < +### Submit the Changes(Require): Submit the changes to the repository. +submit +""" + + +IMPORTANT_TIPS = """ +1. If you run a command and it doesn't work, try running a different command. A command that did not work once will not work the second time unless you modify it! + +2. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the goto 583 command. It's much quicker. + +3. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current open file. + +4. When editing files, it is easy to accidentally specify a wrong line number or to write code with incorrect indentation. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it. + +5. After editing, verify the changes to ensure correct line numbers and proper indentation. Adhere to PEP8 standards for Python code. + +6. NOTE ABOUT THE EDIT COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate indentation before each line! Ensuring the code adheres to PEP8 standards. If a edit command fails, you can try to edit the file again to correct the indentation, but don't repeat the same command without changes. + +7. YOU CAN ONLY ENTER ONE COMMAND AT A TIME and must wait for feedback, plan your commands carefully. + +8. You cannot use any interactive session commands (e.g. python, vim) in this environment, but you can write scripts and run them. E.g. you can write a python script and then run it with `python .py`. + +9. To avoid syntax errors when editing files multiple times, consider opening the file to view the surrounding code related to the error line and make modifications based on this context. + +10. When using the `edit` command, remember it operates within a closed range. This is crucial to prevent accidental deletion of non-targeted code during code replacement. + +11. Ensure to observe the currently open file and the current working directory, which is displayed right after the open file. The open file might be in a different directory than the working directory. Remember, commands like 'create' open files and might alter the current open file. + +12. Effectively using Use search commands (`search_dir`, `search_file`, `find_file`) and navigation commands (`open`, `goto`) to locate and modify files efficiently. Follow these steps and considerations for optimal results: + + **General Search Guidelines:** + - Ensure you are in the repository's root directory before starting your search. + - Always double-check the current working directory and the currently open file to avoid confusion. + - Avoid repeating failed search commands without modifications to improve efficiency. + + **Strategies for Searching and Navigating Files:** + + 1. **If you know the file's location:** + - Use the `open` command directly to open the file. + - Use `search_file` to find the `search_term` within the currently open file. + - Alternatively, use the `goto` command to jump to the specified line. + - **Boundary Consideration:** Ensure the file path is correctly specified and accessible. + + 2. **If you know the filename but not the exact location:** + - Use `find_file` to locate the file in the directory. + - Use `open` to open the file once located. + - Use `search_file` to find the `search_term` within the file. + - Use `goto` to jump to the specified line if needed. + - **Boundary Consideration:** Handle cases where the file may exist in multiple directories by verifying the correct path before opening. + + 3. **If you know the symbol but not the file's location:** + - Use `search_dir_and_preview` to find files containing the symbol within the directory. + - Review the search results to identify the relevant file(s). + - Use `open` to open the identified file. + - Use `search_file` to locate the `search_term` within the open file. + - Use `goto` to jump to the specified line. + - **Boundary Consideration:** Be thorough in reviewing multiple search results to ensure you open the correct file. Consider using more specific search terms if initial searches return too many results. + + **Search Tips:** + - The `` for `search_dir_and_preview`, `find_file`, or `search_file` should be an existing class name, function name, or file name. + - Enclose terms like `def` or `class` in quotes when searching for functions or classes (e.g., `search_dir_and_preview 'def apow'` or `search_file 'class Pow'`). + - Use wildcard characters (`*`, `?`) in search terms to broaden or narrow down your search scope. + - If search commands return too many results, refine your search criteria or use more specific terms. + - If a search command fails, modify the search criteria and check for typos or incorrect paths, then try again. + - Based on feedback of observation or bash command in trajectory to guide adjustments in your search strategy. + +13. If the task results in succeed, fail, or NO PROGRESS, output `submit`. +""" + +NEXT_STEP_TEMPLATE = f""" +# Example of Output +These examples are provided to demonstrate the output style that expected to be several stages including Locate issue, Fix the bug, Test the fix(Optional), and Submit the changes. It is included to show you how to correctly use the interface. You do not need to follow exactly what is done in the Example. The separator is "-----". +----- Beginning of Examples ----- +{MINIMAL_EXAMPLE} +----- End of Examples ----- + +# IMPORTANT TIPS +{IMPORTANT_TIPS} + +# Output Next Step +The current bash state is: +(Open file: {{open_file}}) +(Current directory: {{working_dir}}) + +Avoid repeating the same command. Instead, please think about the current situation and provide the next bash command to execute in JSON format:" + +""" diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 906c5583c..4edbb8e9c 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -4,14 +4,18 @@ import inspect import json import re import traceback -from typing import Callable, Literal, Tuple +from typing import Callable, Dict, List, Literal, Tuple, Union from pydantic import model_validator from metagpt.actions import Action from metagpt.actions.di.run_command import RunCommand from metagpt.logs import logger -from metagpt.prompts.di.role_zero import CMD_PROMPT, ROLE_INSTRUCTION, JSON_REPAIR_PROMPT +from metagpt.prompts.di.role_zero import ( + CMD_PROMPT, + JSON_REPAIR_PROMPT, + ROLE_INSTRUCTION, +) from metagpt.roles import Role from metagpt.schema import AIMessage, Message, UserMessage from metagpt.strategy.experience_retriever import DummyExpRetriever, ExpRetriever @@ -21,8 +25,8 @@ from metagpt.tools.libs.editor import Editor from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender from metagpt.tools.tool_registry import register_tool from metagpt.utils.common import CodeParser +from metagpt.utils.repair_llm_raw_output import RepairType, repair_llm_raw_output from metagpt.utils.report import ThoughtReporter -from metagpt.utils.repair_llm_raw_output import repair_llm_raw_output, RepairType @register_tool(include_functions=["ask_human", "reply_to_human"]) @@ -163,25 +167,15 @@ class RoleZero(Role): if self.use_fixed_sop: return await super()._act() - try: - commands = CodeParser.parse_code(block=None, lang="json", text=self.command_rsp) - commands = json.loads(repair_llm_raw_output(output=commands, req_keys=[None], repair_type=RepairType.JSON)) - except json.JSONDecodeError as e: - commands = await self.llm.aask(msg=JSON_REPAIR_PROMPT.format(json_data=self.command_rsp)) - commands = json.loads(CodeParser.parse_code(block=None, lang="json", text=commands)) - except Exception as e: - tb = traceback.format_exc() - print(tb) - error_msg = UserMessage(content=str(e)) - self.rc.memory.add(error_msg) + commands, ok = await self._get_commands() + if not ok: + error_msg = commands return error_msg - - # 为了对LLM不按格式生成进行容错 - if isinstance(commands, dict): - commands = commands["commands"] if "commands" in commands else [commands] - + logger.info(f"Commands: \n{commands}") outputs = await self._run_commands(commands) + logger.info(f"Commands outputs: \n{outputs}") self.rc.memory.add(UserMessage(content=outputs)) + return AIMessage( content=f"Complete run with outputs: {outputs}", sent_from=self.name, @@ -208,6 +202,36 @@ class RoleZero(Role): actions_taken += 1 return rsp # return output from the last action + async def _get_commands(self) -> Tuple[Union[UserMessage, List[Dict]], bool]: + """Retrieves commands from the Large Language Model (LLM). + + This function attempts to retrieve a list of commands from the LLM by + processing the response (`self.command_rsp`). It handles potential errors + during parsing and LLM response formats. + + Returns: + A tuple containing: + - A `UserMessage` object or dict representing the commands. + - A boolean flag indicating success (True) or failure (False). + """ + try: + commands = CodeParser.parse_code(block=None, lang="json", text=self.command_rsp) + commands = json.loads(repair_llm_raw_output(output=commands, req_keys=[None], repair_type=RepairType.JSON)) + except json.JSONDecodeError: + commands = await self.llm.aask(msg=JSON_REPAIR_PROMPT.format(json_data=self.command_rsp)) + commands = json.loads(CodeParser.parse_code(block=None, lang="json", text=commands)) + except Exception as e: + tb = traceback.format_exc() + logger.debug(tb) + error_msg = UserMessage(content=str(e)) + self.rc.memory.add(error_msg) + return error_msg, False + + # 为了对LLM不按格式生成进行容错 + if isinstance(commands, dict): + commands = commands["commands"] if "commands" in commands else [commands] + return commands, True + async def _run_commands(self, commands) -> str: outputs = [] for cmd in commands: diff --git a/metagpt/roles/di/swe.py b/metagpt/roles/di/swe.py new file mode 100644 index 000000000..1fd1324cf --- /dev/null +++ b/metagpt/roles/di/swe.py @@ -0,0 +1,74 @@ +import json +import os + +from pydantic import Field + +from metagpt.logs import logger +from metagpt.prompts.di.swe import ( + MINIMAL_EXAMPLE, + NEXT_STEP_TEMPLATE, + SWE_AGENT_SYSTEM_TEMPLATE, +) +from metagpt.roles.di.role_zero import RoleZero +from metagpt.tools.libs.terminal import Bash +from metagpt.tools.swe_agent_commands.swe_agent_utils import extract_patch + + +class SWE(RoleZero): + name: str = "SweAgent" + profile: str = "Software Engineer" + goal: str = "Resolve GitHub issue" + _bash_window_size: int = 100 + _system_msg: str = SWE_AGENT_SYSTEM_TEMPLATE + system_msg: list[str] = [_system_msg.format(WINDOW=_bash_window_size)] + _instruction: str = NEXT_STEP_TEMPLATE + tools: list[str] = ["Bash"] + terminal: Bash = Field(default_factory=Bash, exclude=True) + output_diff: str = "" + max_react_loop: int = 30 + + async def _think(self) -> bool: + self._set_system_msg() + self._format_instruction() + res = await super()._think() + await self._handle_action() + return res + + def _set_system_msg(self): + if os.getenv("WINDOW"): + self._bash_window_size = int(os.getenv("WINDOW")) + self.system_msg = [self._system_msg.format(WINDOW=self._bash_window_size)] + + def _format_instruction(self): + state_output = self.terminal.run("state") + bash_state = json.loads(state_output) + + self.instruction = self._instruction.format( + WINDOW=self._bash_window_size, examples=MINIMAL_EXAMPLE, **bash_state + ).strip() + + return self.instruction + + async def _handle_action(self): + commands, ok = await self._get_commands() + if not ok: + return + for cmd in commands: + if "submit" not in cmd.get("args", {}).get("cmd", ""): + return + try: + # Generate patch by git diff + diff_output = self.terminal.run("git diff") + clear_diff = extract_patch(diff_output) + logger.info(f"Diff output: \n{clear_diff}") + if clear_diff: + self.output_diff = clear_diff + + except Exception as e: + logger.error(f"Error during submission: {e}") + + def _update_tool_execution(self): + self.tool_execution_map.update({"Bash.run": self.terminal.run}) + + def _retrieve_experience(self) -> str: + return MINIMAL_EXAMPLE diff --git a/metagpt/tools/libs/terminal.py b/metagpt/tools/libs/terminal.py index faf2893a7..938eadff4 100644 --- a/metagpt/tools/libs/terminal.py +++ b/metagpt/tools/libs/terminal.py @@ -2,6 +2,7 @@ import subprocess import threading from queue import Queue +from metagpt.const import SWE_SETUP_PATH from metagpt.tools.tool_registry import register_tool from metagpt.utils.report import END_MARKER_VALUE, TerminalReporter @@ -26,7 +27,7 @@ class Terminal: stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - executable="/bin/bash" + executable="/bin/bash", ) self.stdout_queue = Queue() self.observer = TerminalReporter() @@ -129,3 +130,95 @@ class Terminal: self.process.stdin.close() self.process.terminate() self.process.wait() + + +@register_tool(include_functions=["run"]) +class Bash(Terminal): + """ + A class to run bash commands directly and provides custom shell functions. + """ + + def __init__(self): + """init""" + super().__init__() + setup_cmd = f"source {SWE_SETUP_PATH}" + self.run_command(setup_cmd) + + def run(self, cmd) -> str: + """ + Executes a bash command. + + Args: + cmd (str): The bash command to execute. + + Returns: + str: The output of the command. + + This method allows for executing standard bash commands as well as + utilizing several custom shell functions defined in the environment. + + Custom Shell Functions: + + - open [] + Opens the file at the given path in the editor. If line_number is provided, + the window will move to include that line. + Arguments: + path (str): The path to the file to open. + line_number (int, optional): The line number to move the window to. + If not provided, the window will start at the top of the file. + + - goto + Moves the window to show . + Arguments: + line_number (int): The line number to move the window to. + + - scroll_down + Moves the window down {WINDOW} lines. + + - scroll_up + Moves the window up {WINDOW} lines. + + - create + Creates and opens a new file with the given name. + Arguments: + filename (str): The name of the file to create. + + - submit + Submits your current code and terminates the session. + + - search_dir_and_preview [] + Searches for search_term in all files in dir and gives their code preview + with line numbers. If dir is not provided, searches in the current directory. + Arguments: + search_term (str): The term to search for. + dir (str, optional): The directory to search in. Defaults to the current directory. + + - search_file [] + Searches for search_term in file. If file is not provided, searches in the current open file. + Arguments: + search_term (str): The term to search for. + file (str, optional): The file to search in. Defaults to the current open file. + + - find_file [] + Finds all files with the given name in dir. If dir is not provided, searches in the current directory. + Arguments: + file_name (str): The name of the file to search for. + dir (str, optional): The directory to search in. Defaults to the current directory. + + - edit : < + EOF + Line numbers start from 1. Replaces lines through (inclusive) with the given text in the open file. + The replacement text is terminated by a line with only EOF on it. All of the will be entered, so make + sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system + detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error + message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same + error message again. All code modifications made via the 'edit' command must strictly follow the PEP8 standard. + Arguments: + start_line (int): The line number to start the edit at, starting from 1. + end_line (int): The line number to end the edit at (inclusive), starting from 1. + replacement_text (str): The text to replace the current selection with, must conform to PEP8 standards. + + Note: Make sure to use these functions as per their defined arguments and behaviors. + """ + return self.run_command(cmd) diff --git a/metagpt/tools/swe_agent_commands/_setup_default_env.sh b/metagpt/tools/swe_agent_commands/_setup_default_env.sh new file mode 100644 index 000000000..8fb4a379e --- /dev/null +++ b/metagpt/tools/swe_agent_commands/_setup_default_env.sh @@ -0,0 +1,20 @@ +# _setup_default_env.sh +# Default Mode from SWE-Bench +# https://github.com/princeton-nlp/SWE-agent/blob/ca54d5556b9db4f4f2be21f09530ce69a72c0305/config/configs/default_sys-env_window100-detailed_cmd_format-last_5_history-1_demos.yaml + +export WINDOW=100 +export OVERLAP=2 +export CURRENT_LINE=0 +export CURRENT_FILE='' +export SEARCH_RESULTS=() +export SEARCH_FILES=() +export SEARCH_INDEX=0 + +state() { + local working_dir="$PWD" + if [ ! -e "$CURRENT_FILE" ]; then + echo '{"open_file": "n/a", "working_dir": "'$working_dir'"}' + else + echo '{"open_file": "'$(realpath "$CURRENT_FILE")'", "working_dir": "'$working_dir'"}' + fi +} diff --git a/metagpt/tools/swe_agent_commands/_split_string b/metagpt/tools/swe_agent_commands/_split_string new file mode 100755 index 000000000..ecc363e71 --- /dev/null +++ b/metagpt/tools/swe_agent_commands/_split_string @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import sys + + +def print_flake8_output(input_string, show_line_numbers=False): + for value in input_string.split("\n"): + parts = value.split() + if not show_line_numbers: + print(f"- {' '.join(parts[1:])}") + else: + line_nums = ":".join(parts[0].split(":")[1:]) + print(f"- {line_nums} {' '.join(parts[1:])}") + + +if __name__ == "__main__": + lint_output = sys.argv[1] + print_flake8_output(lint_output) diff --git a/metagpt/tools/swe_agent_commands/_split_string.py b/metagpt/tools/swe_agent_commands/_split_string.py new file mode 100755 index 000000000..ecc363e71 --- /dev/null +++ b/metagpt/tools/swe_agent_commands/_split_string.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import sys + + +def print_flake8_output(input_string, show_line_numbers=False): + for value in input_string.split("\n"): + parts = value.split() + if not show_line_numbers: + print(f"- {' '.join(parts[1:])}") + else: + line_nums = ":".join(parts[0].split(":")[1:]) + print(f"- {line_nums} {' '.join(parts[1:])}") + + +if __name__ == "__main__": + lint_output = sys.argv[1] + print_flake8_output(lint_output) diff --git a/metagpt/tools/swe_agent_commands/defaults.sh b/metagpt/tools/swe_agent_commands/defaults.sh new file mode 100644 index 000000000..880920711 --- /dev/null +++ b/metagpt/tools/swe_agent_commands/defaults.sh @@ -0,0 +1,193 @@ +_print() { + local total_lines=$(awk 'END {print NR}' $CURRENT_FILE) + echo "[File: $(realpath $CURRENT_FILE) ($total_lines lines total)]" + lines_above=$(jq -n "$CURRENT_LINE - $WINDOW/2" | jq '[0, .] | max | floor') + lines_below=$(jq -n "$total_lines - $CURRENT_LINE - $WINDOW/2" | jq '[0, .] | max | round') + if [ $lines_above -gt 0 ]; then + echo "($lines_above more lines above)" + fi + cat $CURRENT_FILE | grep -n $ | head -n $(jq -n "[$CURRENT_LINE + $WINDOW/2, $WINDOW/2] | max | floor") | tail -n $(jq -n "$WINDOW") + if [ $lines_below -gt 0 ]; then + echo "($lines_below more lines below)" + fi +} + +_constrain_line() { + if [ -z "$CURRENT_FILE" ] + then + echo "No file open. Use the open command first." + return + fi + local max_line=$(awk 'END {print NR}' $CURRENT_FILE) + local half_window=$(jq -n "$WINDOW/2" | jq 'floor') + export CURRENT_LINE=$(jq -n "[$CURRENT_LINE, $max_line - $half_window] | min") + export CURRENT_LINE=$(jq -n "[$CURRENT_LINE, $half_window] | max") +} + +# @yaml +# signature: open [] +# docstring: opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line +# arguments: +# path: +# type: string +# description: the path to the file to open +# required: true +# line_number: +# type: integer +# description: the line number to move the window to (if not provided, the window will start at the top of the file) +# required: false +open() { + if [ -z "$1" ] + then + echo "Usage: open " + return + fi + # Check if the second argument is provided + if [ -n "$2" ]; then + # Check if the provided argument is a valid number + if ! [[ $2 =~ ^[0-9]+$ ]]; then + echo "Usage: open []" + echo "Error: must be a number" + return # Exit if the line number is not valid + fi + local max_line=$(awk 'END {print NR}' $1) + if [ $2 -gt $max_line ]; then + echo "Warning: ($2) is greater than the number of lines in the file ($max_line)" + echo "Warning: Setting to $max_line" + local line_number=$(jq -n "$max_line") # Set line number to max if greater than max + elif [ $2 -lt 1 ]; then + echo "Warning: ($2) is less than 1" + echo "Warning: Setting to 1" + local line_number=$(jq -n "1") # Set line number to 1 if less than 1 + else + local OFFSET=$(jq -n "$WINDOW/6" | jq 'floor') + local line_number=$(jq -n "[$2 + $WINDOW/2 - $OFFSET, 1] | max | floor") + fi + else + local line_number=$(jq -n "$WINDOW/2") # Set default line number if not provided + fi + + if [ -f "$1" ]; then + export CURRENT_FILE=$(realpath $1) + export CURRENT_LINE=$line_number + _constrain_line + _print + elif [ -d "$1" ]; then + echo "Error: $1 is a directory. You can only open files. Use cd or ls to navigate directories." + else + echo "File $1 not found" + fi +} + +# @yaml +# signature: goto +# docstring: moves the window to show +# arguments: +# line_number: +# type: integer +# description: the line number to move the window to +# required: true +goto() { + if [ $# -gt 1 ]; then + echo "goto allows only one line number at a time." + return + fi + if [ -z "$CURRENT_FILE" ] + then + echo "No file open. Use the open command first." + return + fi + if [ -z "$1" ] + then + echo "Usage: goto " + return + fi + if ! [[ $1 =~ ^[0-9]+$ ]] + then + echo "Usage: goto " + echo "Error: must be a number" + return + fi + local max_line=$(awk 'END {print NR}' $CURRENT_FILE) + if [ $1 -gt $max_line ] + then + echo "Error: must be less than or equal to $max_line" + return + fi + local OFFSET=$(jq -n "$WINDOW/6" | jq 'floor') + export CURRENT_LINE=$(jq -n "[$1 + $WINDOW/2 - $OFFSET, 1] | max | floor") + _constrain_line + _print +} + +# @yaml +# signature: scroll_down +# docstring: moves the window down {WINDOW} lines +scroll_down() { + if [ -z "$CURRENT_FILE" ] + then + echo "No file open. Use the open command first." + return + fi + export CURRENT_LINE=$(jq -n "$CURRENT_LINE + $WINDOW - $OVERLAP") + _constrain_line + _print +} + +# @yaml +# signature: scroll_up +# docstring: moves the window down {WINDOW} lines +scroll_up() { + if [ -z "$CURRENT_FILE" ] + then + echo "No file open. Use the open command first." + return + fi + export CURRENT_LINE=$(jq -n "$CURRENT_LINE - $WINDOW + $OVERLAP") + _constrain_line + _print +} + +# @yaml +# signature: create +# docstring: creates and opens a new file with the given name +# arguments: +# filename: +# type: string +# description: the name of the file to create +# required: true +create() { + if [ -z "$1" ]; then + echo "Usage: create " + return + fi + + # Check if the file already exists + if [ -e "$1" ]; then + echo "Error: File '$1' already exists." + open "$1" + return + fi + + # Create the file an empty new line + printf "\n" > "$1" + # Use the existing open command to open the created file + open "$1" +} + +# @yaml +# signature: submit +# docstring: submits your current code and terminates the session +submit() { + # Check if the patch file exists and is non-empty + if [ -s "$SWE_CMD_WORK_DIR/test.patch" ]; then + # Apply the patch in reverse + git apply -R < "$SWE_CMD_WORK_DIR/test.patch" + fi + + git add -A + git diff --cached > model.patch + echo "<>" +} diff --git a/metagpt/tools/swe_agent_commands/edit_linting.sh b/metagpt/tools/swe_agent_commands/edit_linting.sh new file mode 100644 index 000000000..e6d675ada --- /dev/null +++ b/metagpt/tools/swe_agent_commands/edit_linting.sh @@ -0,0 +1,165 @@ +# @yaml +# signature: |- +# edit : < +# EOF +# docstring: Line numbers start from 1. Replaces lines through (inclusive) with the given text in the open file. The replacement text is terminated by a line with only EOF on it. All of the will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again. All code modifications made via the 'edit' command must strictly follow the PEP8 standard. +# end_name: EOF +# arguments: +# start_line: +# type: integer +# description: the line number to start the edit at, start from 1. +# required: true +# end_line: +# type: integer +# description: the line number to end the edit at (inclusive), start from 1. +# required: true +# replacement_text: +# type: string +# description: the text to replace the current selection with must conform to PEP8 standards. +# required: true +edit() { + if [ -z "$CURRENT_FILE" ] + then + echo 'No file open. Use the `open` command first.' + return + fi + + local start_line="$(echo $1: | cut -d: -f1)" + local end_line="$(echo $1: | cut -d: -f2)" + + if [ -z "$start_line" ] || [ -z "$end_line" ] + then + echo "Usage: edit :" + return + fi + + local re='^[0-9]+$' + if ! [[ $start_line =~ $re ]]; then + echo "Usage: edit :" + echo "Error: start_line must be a number" + return + fi + if ! [[ $end_line =~ $re ]]; then + echo "Usage: edit :" + echo "Error: end_line must be a number" + return + fi + + # Run linter for original file + if [[ $CURRENT_FILE == *.py ]]; then + original_lint_output=$(flake8 --isolated --select=F821,F822,F831,E112,E113,E999,E902 "$CURRENT_FILE" 2>&1) + else + # do nothing + original_lint_output="" + fi + + + # Bash array starts at 0, so let's adjust + local start_line=$((start_line - 1)) + local end_line=$((end_line)) + + local line_count=0 + local replacement=() + while IFS= read -r line + do + replacement+=("$line") + ((line_count++)) + done + + # Create a backup of the current file + cp "$CURRENT_FILE" "$SWE_CMD_WORK_DIR/$(basename "$CURRENT_FILE")_backup" + + # Read the file line by line into an array + mapfile -t lines < "$CURRENT_FILE" + local new_lines=("${lines[@]:0:$start_line}" "${replacement[@]}" "${lines[@]:$((end_line))}") + # Write the new stuff directly back into the original file + printf "%s\n" "${new_lines[@]}" >| "$CURRENT_FILE" + + # Run linter + if [[ $CURRENT_FILE == *.py ]]; then + lint_output=$(flake8 --isolated --select=F821,F822,F831,E112,E113,E999,E902 "$CURRENT_FILE" 2>&1) + else + # do nothing + lint_output="" + fi + + # Create temporary files + temp_original=$(mktemp) + temp_modified=$(mktemp) + + # Remove line numbers and save cleaned outputs to temporary files + echo "$original_lint_output" | sed 's/:[0-9]\+:[0-9]\+:/:LINE:COL:/g' > "$temp_original" + echo "$lint_output" | sed 's/:[0-9]\+:[0-9]\+:/:LINE:COL:/g' > "$temp_modified" + + + # Compare the temporary files + if cmp -s "$temp_original" "$temp_modified"; then + lint_output="" + else + echo "Linter output for the original file:" + cat "$temp_original" + # print linter result + echo "Linter output for the modified file:" + cat "$temp_modified" + fi + + # Clean up temporary files + rm "$temp_original" "$temp_modified" + + # if there is no output, then the file is good + if [ -z "$lint_output" ]; then + export CURRENT_LINE=$start_line + _constrain_line + _print + + echo "File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary." + else + echo "Your proposed edit has introduced new syntax error(s). Please understand the fixes and retry your edit command." + echo "" + echo "ERRORS:" + _split_string "$lint_output" + echo "" + + # Save original values + original_current_line=$CURRENT_LINE + original_window=$WINDOW + + # Update values + export CURRENT_LINE=$(( (line_count / 2) + start_line )) # Set to "center" of edit + export WINDOW=$((line_count + 10)) # Show +/- 5 lines around edit + + echo "This is how your edit would have looked if applied" + echo "-------------------------------------------------" + _constrain_line + _print + echo "-------------------------------------------------" + echo "" + + + # Restoring CURRENT_FILE to original contents. + cp "$SWE_CMD_WORK_DIR/$(basename "$CURRENT_FILE")_backup" "$CURRENT_FILE" + + export CURRENT_LINE=$(( ((end_line - start_line + 1) / 2) + start_line )) + export WINDOW=$((end_line - start_line + 10)) + + echo "This is the original code before your edit" + echo "-------------------------------------------------" + _constrain_line + _print + echo "-------------------------------------------------" +# + + # Restore original values + export CURRENT_LINE=$original_current_line + export WINDOW=$original_window + + echo "Your changes have NOT been applied. Please fix your edit command and try again." + echo "You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code." + echo "DO NOT re-run the same failed edit command. Running it again will lead to the same error." + fi + + + # Remove backup file + rm -f "$SWE_CMD_WORK_DIR/$(basename "$CURRENT_FILE")_backup" +} diff --git a/metagpt/tools/swe_agent_commands/execute_env_utils.py b/metagpt/tools/swe_agent_commands/execute_env_utils.py new file mode 100644 index 000000000..8fbc616fa --- /dev/null +++ b/metagpt/tools/swe_agent_commands/execute_env_utils.py @@ -0,0 +1,359 @@ +import json +import os +import re +import select +import shlex +import subprocess +import tarfile +import tempfile +import threading +import time +import traceback +from io import BytesIO +from subprocess import PIPE, STDOUT +from typing import Tuple + +import docker +from datasets import load_dataset, load_from_disk +from ghapi.all import GhApi + +from metagpt.logs import logger + +LOGGER_NAME = "intercode" +START_UP_DELAY = 5 +TIMEOUT_DURATION = 25 +GITHUB_ISSUE_URL_PATTERN = re.compile(r"github\.com\/(.*?)\/(.*?)\/issues\/(\d+)") + + +def is_from_github_url(data_path: str): + return GITHUB_ISSUE_URL_PATTERN.search(data_path) is not None + + +def copy_file_to_container(container, contents, container_path): + """ + Copies a given string into a Docker container at a specified path. + + Args: + - container: Docker SDK container object. + - contents: The string to copy into the container. + - container_path: The path inside the container where the string should be copied to. + + Returns: + - None + """ + temp_file_name = None + + try: + # Create a temporary file + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + temp_file_name = temp_file.name + # Write the string to the temporary file and ensure it's written to disk + temp_file.write(contents.encode("utf-8")) + temp_file.flush() + os.fsync(temp_file.fileno()) + + # Create a TAR archive in memory containing the temporary file + with tempfile.NamedTemporaryFile(): + with open(temp_file_name, "rb") as temp_file: + # Prepare the TAR archive + with BytesIO() as tar_stream: + with tarfile.open(fileobj=tar_stream, mode="w") as tar: + tar_info = tarfile.TarInfo(name=os.path.basename(container_path)) + tar_info.size = os.path.getsize(temp_file_name) + tar.addfile(tarinfo=tar_info, fileobj=temp_file) + tar_stream.seek(0) + # Copy the TAR stream to the container + container.put_archive(path=os.path.dirname(container_path), data=tar_stream.read()) + + except Exception as e: + logger.error(f"An error occurred: {e}") + logger.error(traceback.format_exc()) + finally: + # Cleanup: Remove the temporary file if it was created + if temp_file_name and os.path.exists(temp_file_name): + os.remove(temp_file_name) + + +def read_with_timeout(container, pid_func, timeout_duration): + """ + Read data from a subprocess with a timeout. + This function uses a file descriptor to read data from the subprocess in a non-blocking way. + + Args: + container (subprocess.Popen): The subprocess container. + pid_func (function): A function that returns a list of process IDs (except the PID of the main process). + timeout_duration (int): The timeout duration in seconds. + + Returns: + str: The data read from the subprocess, stripped of trailing newline characters. + + Raises: + TimeoutError: If the timeout duration is reached while reading from the subprocess. + """ + buffer = b"" + fd = container.stdout.fileno() + end_time = time.time() + timeout_duration + + while time.time() < end_time: + pids = pid_func() + if len(pids) > 0: + # There are still PIDs running + time.sleep(0.05) + continue + ready_to_read, _, _ = select.select([fd], [], [], 0.1) + if ready_to_read: + data = os.read(fd, 4096) + if data: + buffer += data + else: + # No more data to read + break + time.sleep(0.05) # Prevents CPU hogging + + if container.poll() is not None: + raise RuntimeError("Subprocess exited unexpectedly.\nCurrent buffer: {}".format(buffer.decode())) + if time.time() >= end_time: + raise TimeoutError( + "Timeout reached while reading from subprocess.\nCurrent buffer: {}\nRunning PIDs: {}".format( + buffer.decode(), pids + ) + ) + return buffer.decode() + + +class timeout: + def __init__(self, seconds=TIMEOUT_DURATION, error_message="Timeout"): + self.seconds = seconds + self.error_message = error_message + self.timer = None + self.timeout_occurred = False + + def handle_timeout(self, signum=None, frame=None): + self.timeout_occurred = True + + def __enter__(self): + # signal.signal(signal.SIGALRM, self.handle_timeout) + # signal.alarm(self.seconds) + self.timer = threading.Timer(self.seconds, self.handle_timeout) + self.timer.start() + return self + + def __exit__(self, type, value, traceback): + self.timer.cancel() + if self.timeout_occurred: + print(self.error_message) # 处理超时的逻辑 + + +def get_background_pids(container_obj): + pids = container_obj.exec_run("ps -eo pid,comm --no-headers").output.decode().split("\n") + pids = [x.split() for x in pids if x] + pids = [x for x in pids if x[1] not in {"ps"} and x[0] != "1"] + bash_pids = [x for x in pids if x[1] == "bash"] + other_pids = [x for x in pids if x[1] not in {"bash"}] + return bash_pids, other_pids + + +def _get_non_persistent_container(ctr_name: str, image_name: str) -> Tuple[subprocess.Popen, set]: + startup_cmd = [ + "docker", + "run", + "-i", + "--rm", + "--name", + ctr_name, + image_name, + "/bin/bash", + "-l", + "-m", + ] + + logger.debug("Starting container with command: %s", shlex.join(startup_cmd)) + container = subprocess.Popen( + startup_cmd, + stdin=PIPE, + stdout=PIPE, + stderr=STDOUT, + text=True, + bufsize=1, # line buffered + ) + time.sleep(START_UP_DELAY) + # try to read output from container setup (usually an error), timeout if no output + try: + with timeout(seconds=2): + output = container.stdout.read() + if output: + logger.error(f"Unexpected container setup output: {output}") + except TimeoutError: + pass + return container, { + "1", + } # bash PID is always 1 for non-persistent containers + + +def _get_persistent_container(ctr_name: str, image_name: str, persistent: bool = False) -> Tuple[subprocess.Popen, set]: + client = docker.from_env() + containers = client.containers.list(all=True, filters={"name": ctr_name}) + if ctr_name in [c.name for c in containers]: + container_obj = client.containers.get(ctr_name) + if container_obj.status in {"created"}: + container_obj.start() + elif container_obj.status in {"running"}: + pass + elif container_obj.status in {"exited"}: + container_obj.restart() + elif container_obj.status in {"paused"}: + container_obj.unpause() + else: + raise RuntimeError(f"Unexpected container status: {container_obj.status}") + else: + container_obj = client.containers.run( + image_name, + command="/bin/bash -l -m", + name=ctr_name, + stdin_open=True, + tty=True, + detach=True, + auto_remove=not persistent, + ) + container_obj.start() + startup_cmd = [ + "docker", + "exec", + "-i", + ctr_name, + "/bin/bash", + "-l", + "-m", + ] + logger.debug("Starting container with command: %s", shlex.join(startup_cmd)) + container = subprocess.Popen( + startup_cmd, + stdin=PIPE, + stdout=PIPE, + stderr=STDOUT, + text=True, + bufsize=1, # line buffered + ) + time.sleep(START_UP_DELAY) + # try to read output from container setup (usually an error), timeout if no output + try: + with timeout(seconds=2): + output = container.stdout.read() + if output: + logger.error(f"Unexpected container setup output: {output}") + except TimeoutError: + pass + # Get the process IDs of the container + # There should be at least a head process and possibly one child bash process + bash_pids, other_pids = get_background_pids(container_obj) + bash_pid = 1 + if len(bash_pids) == 1: + bash_pid = bash_pids[0][0] + elif len(bash_pids) > 1 or len(other_pids) > 0: + raise RuntimeError( + f"Detected alien processes attached or running. Please ensure that no other agents are running on this container. PIDs: {bash_pids}, {other_pids}" + ) + return container, set( + map( + str, + [ + bash_pid, + 1, + ], + ) + ) + + +def get_container(ctr_name: str, image_name: str, persistent: bool = False) -> subprocess.Popen: + """ + Get a container object for a given container name and image name + + Arguments: + ctr_name (str): Name of container + image_name (str): Name of image + persistent (bool): Whether to use a persistent container or not + Returns: + Container object + """ + if persistent: + return _get_persistent_container(ctr_name, image_name) + else: + return _get_non_persistent_container(ctr_name, image_name) + + +def get_commit(api: GhApi, owner: str, repo: str, base_commit: str = None): + if base_commit: + commit = api.repos.get_commit(owner, repo, base_commit) + else: + commit = api.repos.list_commits(owner, repo)[0] + return commit + + +class InvalidGithubURL(ValueError): + ... + + +def parse_gh_issue_url(issue_url: str) -> Tuple[str, str, str]: + """Return owner, repo, issue number from issue url""" + match = GITHUB_ISSUE_URL_PATTERN.search(issue_url) + if not match: + raise InvalidGithubURL(f"Invalid GitHub issue URL: {issue_url}") + res = match.groups() + assert len(res) == 3 + return tuple(res) # type: ignore + + +def get_instances(file_path: str, base_commit: str = None, split: str = None, token: str = None): + """ + Getter function for handling json, jsonl files + + Arguments: + file_path (str): Path to file + Returns: + List of instances + """ + # If file_path is a directory, attempt load from disk + if os.path.isdir(file_path): + dataset_or_dict = load_from_disk(file_path) + if isinstance(dataset_or_dict, dict): + return dataset_or_dict[split] + return dataset_or_dict + + # If file_path is a github issue url, fetch the issue and return a single instance + if is_from_github_url(file_path): + try: + owner, repo, issue_number = parse_gh_issue_url(file_path) + except InvalidGithubURL: + pass + else: + record = dict() + api = GhApi(token=token) + issue = api.issues.get(owner, repo, issue_number) + title = issue.title if issue.title else "" + body = issue.body if issue.body else "" + text = f"{title}\n{body}\n" + record["repo"] = f"{owner}/{repo}" + record["base_commit"] = base_commit if base_commit else get_commit(api, owner, repo, base_commit).sha + record["version"] = record["base_commit"][:7] + record["problem_statement"] = text + record["instance_id"] = f"{owner}__{repo}-i{issue_number}" + return [ + record, + ] + elif base_commit is not None: + raise ValueError("base_commit must be None if data_path is not a github issue url") + + # If file_path is a file, load the file + if file_path.endswith(".json"): + return json.load(open(file_path)) + if file_path.endswith(".jsonl"): + return [json.loads(x) for x in open(file_path, "r").readlines()] + + # Attempt load from HF datasets as a last resort + try: + return load_dataset(file_path, split=split) + except: + raise ValueError( + f"Could not load instances from {file_path}. " + "Please ensure --data_path is a GitHub URL, a SWE-bench HuggingFace dataset, or a JSON/JSONL file." + ) diff --git a/metagpt/tools/swe_agent_commands/search.sh b/metagpt/tools/swe_agent_commands/search.sh new file mode 100644 index 000000000..b973b2d12 --- /dev/null +++ b/metagpt/tools/swe_agent_commands/search.sh @@ -0,0 +1,245 @@ +# @yaml +# signature: search_dir_and_preview [] +# docstring: searches for search_term in all files in dir and give their code preview with line number if you think need a first look. The output will vary depending on the length of the search results, but the file path, line number & corresponding code or number of occurrences will always be output. If dir is not provided, searches in the current directory +# arguments: +# search_term: +# type: string +# description: the term to search for +# required: true +# dir: +# type: string +# description: the directory to search in (if not provided, searches in the current directory) +# required: false +search_dir_and_preview() { + if [ $# -eq 1 ]; then + local search_term="$1" + local dir="./" + elif [ $# -eq 2 ]; then + local search_term="$1" + if [ -d "$2" ]; then + local dir="$2" + else + echo "Directory $2 not found" + return + fi + else + echo "Usage: search_dir_and_preview []" + return + fi + dir=$(realpath "$dir") + local matches=$(find "$dir" -type f -path '*.py' -exec grep -nIH -- "$search_term" {} + | cut -d: -f1 | sort | uniq -c) +< 100, print an error + if [ $num_files -gt 100 ]; then + echo "More than $num_files files matched for \"$search_term\" in $dir. Please narrow your search." + return + fi + + match_with_cnt=$(echo "$matches" | awk '{$2=$2; gsub(/^\.+\/+/, "./", $2); print $2 " ("$1" matches)"}') +< [] +# docstring: searches for search_term in file. If file is not provided, searches in the current open file +# arguments: +# search_term: +# type: string +# description: the term to search for +# required: true +# file: +# type: string +# description: the file to search in (if not provided, searches in the current open file) +# required: false +search_file() { + # Check if the first argument is provided + if [ -z "$1" ]; then + echo "Usage: search_file []" + return + fi + # Check if the second argument is provided + if [ -n "$2" ]; then + # Check if the provided argument is a valid file + if [ -f "$2" ]; then + local file="$2" # Set file if valid + else + echo "Usage: search_file []" + echo "Error: File name $2 not found. Please provide a valid file name." + return # Exit if the file is not valid + fi + else + # Check if a file is open + if [ -z "$CURRENT_FILE" ]; then + echo "No file open. Use the open command first." + return # Exit if no file is open + fi + local file="$CURRENT_FILE" # Set file to the current open file + fi + local search_term="$1" + file=$(realpath "$file") + # Use grep to directly get the desired formatted output + local matches=$(grep -nH -- "$search_term" "$file") + # Check if no matches were found + if [ -z "$matches" ]; then + echo "No matches found for \"$search_term\" in $file" + return + fi + # Calculate total number of matches + local num_matches=$(echo "$matches" | wc -l | awk '{$1=$1; print $0}') + + # calculate total number of lines matched + local num_lines=$(echo "$matches" | cut -d: -f1 | sort | uniq | wc -l | awk '{$1=$1; print $0}') + # if num_lines is > 100, print an error + if [ $num_lines -gt 100 ]; then + echo "More than $num_lines lines matched for \"$search_term\" in $file. Please narrow your search." + return + fi + + # Print the total number of matches and the matches themselves + echo "Found $num_matches matches for \"$search_term\" in $file:" + echo "$matches" | cut -d: -f1-2 | sort -u -t: -k2,2n | while IFS=: read -r filename line_number; do + echo "Line $line_number:$(sed -n "${line_number}p" "$file")" + done + echo "End of matches for \"$search_term\" in $file" +} + +# @yaml +# signature: find_file [] +# docstring: finds all files with the given name in dir. If dir is not provided, searches in the current directory +# arguments: +# file_name: +# type: string +# description: the name of the file to search for +# required: true +# dir: +# type: string +# description: the directory to search in (if not provided, searches in the current directory) +# required: false +find_file() { + if [ $# -eq 1 ]; then + local file_name="$1" + local dir="./" + elif [ $# -eq 2 ]; then + local file_name="$1" + if [ -d "$2" ]; then + local dir="$2" + else + echo "Directory $2 not found" + return + fi + else + echo "Usage: find_file []" + return + fi + + dir=$(realpath "$dir") + local matches=$(find "$dir" -type f -name "$file_name") + # if no matches, return + if [ -z "$matches" ]; then + echo "No matches found for \"$file_name\" in $dir" + return + fi + # Calculate total number of matches + local num_matches=$(echo "$matches" | wc -l | awk '{$1=$1; print $0}') + echo "Found $num_matches matches for \"$file_name\" in $dir:" + echo "$matches" | awk '{print $0}' +} diff --git a/metagpt/tools/swe_agent_commands/setup_default.sh b/metagpt/tools/swe_agent_commands/setup_default.sh new file mode 100644 index 000000000..dc3b335df --- /dev/null +++ b/metagpt/tools/swe_agent_commands/setup_default.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +pip install flake8 + +# Default Mode from SWE-Bench +# https://github.com/princeton-nlp/SWE-agent/blob/ca54d5556b9db4f4f2be21f09530ce69a72c0305/config/configs/default_sys-env_window100-detailed_cmd_format-last_5_history-1_demos.yaml#L103-L106 +SCRIPT_PATH="${BASH_SOURCE[0]}" # use BASH_SOURCE to avoid the influence of `source *.sh which cause CUR_DIR=/bin` +CUR_DIR=$(dirname $(readlink -f $SCRIPT_PATH)) +REPO_ROOT_DIR=$CUR_DIR"/../../.." +source $REPO_ROOT_DIR/metagpt/tools/swe_agent_commands/_setup_default_env.sh + +# make _split_string (py) available +export PATH=$PATH:$REPO_ROOT_DIR/metagpt/tools/swe_agent_commands + +source $REPO_ROOT_DIR/metagpt/tools/swe_agent_commands/defaults.sh +source $REPO_ROOT_DIR/metagpt/tools/swe_agent_commands/search.sh +source $REPO_ROOT_DIR/metagpt/tools/swe_agent_commands/edit_linting.sh + +export SWE_CMD_WORK_DIR="$REPO_ROOT_DIR/workspace/swe_agent_workdir" +#sudo chmod 777 $REPO_ROOT_DIR/workspace/swe_agent_workdir diff --git a/metagpt/tools/swe_agent_commands/swe_agent_utils.py b/metagpt/tools/swe_agent_commands/swe_agent_utils.py new file mode 100644 index 000000000..8c01dc9c9 --- /dev/null +++ b/metagpt/tools/swe_agent_commands/swe_agent_utils.py @@ -0,0 +1,36 @@ +from pathlib import Path + +import numpy as np +from datasets import load_dataset, load_from_disk + + +def extract_patch(command_output): + patch_lines = [] + recording = False + for line in command_output.split("\n"): + if line.startswith("diff --git"): + recording = True + if recording: + patch_lines.append(line) + return "\n".join(patch_lines) + + +def load_hf_dataset(dataset_name_or_path: str, cache_dir, split: str = "test", existing_ids: list = []): + if Path(dataset_name_or_path).exists(): + dataset = load_from_disk(dataset_name_or_path) + else: + dataset = load_dataset(dataset_name_or_path, cache_dir=cache_dir) + print(dataset) + if split not in dataset: + raise ValueError(f"Invalid split {split} for dataset {dataset_name_or_path}") + dataset = dataset[split] + np.array(list(map(len, dataset["instance_id"]))) + + if existing_ids: + dataset = dataset.filter( + lambda x: x["instance_id"] not in existing_ids, + desc="Filtering out existing ids", + load_from_cache_file=False, + ) + + return dataset diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py index 0ba2daa89..63e2f8736 100644 --- a/metagpt/utils/token_counter.py +++ b/metagpt/utils/token_counter.py @@ -32,6 +32,8 @@ TOKEN_COSTS = { "gpt-4-1106-preview": {"prompt": 0.01, "completion": 0.03}, "gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03}, # TODO add extra image price calculator "gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03}, + "gpt-4o": {"prompt": 0.005, "completion": 0.015}, + "gpt-4o-2024-05-13": {"prompt": 0.005, "completion": 0.015}, "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0}, "glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens "glm-4": {"prompt": 0.014, "completion": 0.014}, # 128k version, prompt + completion tokens=0.1¥/k-tokens @@ -207,6 +209,8 @@ def count_message_tokens(messages, model="gpt-3.5-turbo-0125"): "gpt-4-1106-preview", "gpt-4-vision-preview", "gpt-4-1106-vision-preview", + "gpt-4o-2024-05-13", + "gpt-4o", }: tokens_per_message = 3 # # every reply is primed with <|start|>assistant<|message|> tokens_per_name = 1 diff --git a/requirements.txt b/requirements.txt index 83a904156..23806eb63 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,7 +26,7 @@ PyYAML==6.0.1 # sentence_transformers==2.2.2 setuptools==65.6.3 tenacity==8.2.3 -tiktoken==0.6.0 +tiktoken==0.7.0 tqdm==4.66.2 #unstructured[local-inference] # selenium>4 diff --git a/tests/metagpt/roles/di/run_swe.py b/tests/metagpt/roles/di/run_swe.py new file mode 100644 index 000000000..1e9e31ed6 --- /dev/null +++ b/tests/metagpt/roles/di/run_swe.py @@ -0,0 +1,136 @@ +import asyncio +import json +from datetime import datetime +from pathlib import Path + +from metagpt.config2 import config +from metagpt.const import DEFAULT_WORKSPACE_ROOT, METAGPT_ROOT +from metagpt.logs import logger +from metagpt.roles.di.swe import SWE +from metagpt.tools.swe_agent_commands.swe_agent_utils import load_hf_dataset + +# Specify by yourself +TEST_REPO_DIR = Path("/Users/seeker/Projects/sdfz/mg/mg-swe-agent") / "benchmark" / "swe_bench" / "data" / "test_repo" +DATA_DIR = METAGPT_ROOT / "benchmark" / "swe_bench" / "data" + +INSTANCE_TEMPLATE = """ +## User Requirement +Fix the bug in the repo. Because the environment is not available, you DO NOT need to run and modify any existing test case files or add new test case files to ensure that the bug is fixed. + +We're currently solving the following issue within our repository. You can use any bash commands or the special interface to help you. Here's the issue and hints text: +## ISSUE +{issue} + +## HINTS +hints text is the comment under issue: +{hints_text} + +The repo may already exist at the path `{repo_path}` (if not, please download the repo to this path). +This issue occurred in version {version}, with the corresponding base commit being {base_commit}, you need to switch to the code version corresponding to this commit. + +# INSTRUCTIONS: +Now, you're going to solve this issue on your own from the perspective of a programmer. Your terminal session has started and you're in the repository's root directory. You can use any bash commands or the special interface to help you. Edit all the files you need. +Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command. +""" + + +def split_dataset_equally(dataset): + # 计算索引 + # fixme: 设置django + + part1 = dataset.filter( + lambda x: x["repo"] + not in [ + "django/django", + "sympy/sympy", + "pytest-dev/pytest", + ], + desc="Filtering out existing ids", + load_from_cache_file=True, + ) + + part2 = dataset.filter( + lambda x: x["repo"] in ["sympy/sympy", "pytest-dev/pytest"], + desc="Filtering out existing ids", + load_from_cache_file=True, + ) + + part3 = dataset.filter( + lambda x: x["repo"] in ["django/django"], + desc="Filtering out existing ids", + load_from_cache_file=False, + ) + + print(len(part1), len(part2), len(part3)) + + return [part1, part2, part3] + + +def check_instance_status(instance, swe_result_dir): + output_file = swe_result_dir / "all_preds.jsonl" + res = True + # 先检查all_preds.jsonl文件是否存在 + if not output_file.exists(): + return res + with open(output_file, "r") as fp: + for line in fp: + existing_instance = json.loads(line.strip()) + if existing_instance["instance_id"] == instance["instance_id"]: + return False + return True + + +async def run(instance, swe_result_dir): + if not check_instance_status(instance, swe_result_dir): + logger.info(f"Instance {instance['instance_id']} already exists, skipping execution.") + return + + repo_path = TEST_REPO_DIR / (instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"]) + + user_requirement_and_issue = INSTANCE_TEMPLATE.format( + issue=instance["problem_statement"], + hints_text=instance["hints_text"], + repo_path=repo_path, + version=instance["version"], + base_commit=instance["base_commit"], + ) + + logger.info(f"**** Starting to run {instance['instance_id']}****") + swe_agent = SWE() + await swe_agent.run(user_requirement_and_issue) + save_predictions(swe_agent, instance, swe_result_dir) + logger.info(f"**** Finished running {instance['instance_id']}****") + + +def save_predictions(swe_agent: SWE, instance, swe_result_dir): + output_file = swe_result_dir / "all_preds.jsonl" + instance["model_name_or_path"] = swe_agent.config.llm.model + instance["model_patch"] = swe_agent.output_diff + + logger.info(f"Preparing to save predictions to {output_file}") + + # Save the predictions to a JSONL file + with open(output_file, "a+") as fp: + print(json.dumps(instance), file=fp, flush=True) + + logger.info(f"Saved prediction of {instance['instance_id']} to {output_file}") + + +async def async_main(): + dataset_path = "manna-ai/SWE-bench_Nano" # "princeton-nlp/SWE-bench_Lite" #"manna-ai/SWE-bench_Nano" + + dataset = load_hf_dataset(dataset_name_or_path=dataset_path, cache_dir=DATA_DIR, split="test") + sample_datasets = split_dataset_equally(dataset) + date_time = datetime.now().strftime("%m-%d") + round_ = "first" + + for idx, sub_dataset in enumerate(sample_datasets): + exp_name = f"Nano-test-{date_time}-{round_}-part-{idx}" + swe_result_dir = DEFAULT_WORKSPACE_ROOT / "swe_agent_workdir" / f"result_{config.llm.model}" / exp_name + swe_result_dir.mkdir(parents=True, exist_ok=True) + for instance in sub_dataset: + await run(instance, swe_result_dir) + + +if __name__ == "__main__": + asyncio.run(async_main()) From 92f94862cbdb2019fccf7990e6842d3a3066b9ab Mon Sep 17 00:00:00 2001 From: seeker Date: Tue, 2 Jul 2024 21:26:21 +0800 Subject: [PATCH 02/10] update: swe --- metagpt/prompts/di/swe.py | 16 +- metagpt/roles/di/role_zero.py | 7 +- metagpt/roles/di/swe.py | 23 +- metagpt/tools/swe_agent_commands/__init__.py | 7 + metagpt/tools/swe_agent_commands/defaults.sh | 2 +- .../swe_agent_commands/execute_env_utils.py | 359 ------------------ tests/metagpt/roles/di/run_swe.py | 16 +- 7 files changed, 57 insertions(+), 373 deletions(-) create mode 100644 metagpt/tools/swe_agent_commands/__init__.py delete mode 100644 metagpt/tools/swe_agent_commands/execute_env_utils.py diff --git a/metagpt/prompts/di/swe.py b/metagpt/prompts/di/swe.py index 15e917e11..64c67b09b 100644 --- a/metagpt/prompts/di/swe.py +++ b/metagpt/prompts/di/swe.py @@ -1,3 +1,10 @@ +""" +This code is adapted from the examples provided in the SWE-agent project. +You can find the original examples from the SWE-agent project here: +https://github.com/princeton-nlp/SWE-agent/tree/main/config/configs +""" + + SWE_AGENT_SYSTEM_TEMPLATE = """ SETTING: You are an autonomous programmer, and you're working directly in the command line with a special interface. @@ -8,9 +15,13 @@ If you'd like to add the line ' print(x)' you must fully write that out, Always review your changes post-edit to ensure they accurately reflect your intentions. If the changes are not as desired, don't hesitate to issue another command to correct them. -Your output should always contain a section of reasoning and a command described in JSON format. The command must always contain command_name and args fields. The command_name field should always be Bash.run, and the args field should always include a cmd field containing the bash command, as shown in the example below: - +Your output should always contain a section of reasoning and a command described in JSON format. +The command must always contain command_name and args fields. The command_name field should always be Bash.run, and the args field should always include a cmd field containing the bash command. +Use \\n to represent line breaks, ensuring the command conforms to the JSON format and is displayed on a single line. Except for the `edit` command, each parameter of the command needs to be enclosed in single quotes. +As shown in the example below: + First I'll start by using ls to see what files are in the current directory. Then maybe we can look at some relevant files to see what they look like. + ```json {{ "command_name": "Bash.run", @@ -18,7 +29,6 @@ First I'll start by using ls to see what files are in the current directory. The "cmd": "ls -a" }} }} - ``` diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 53d7393d6..c6ed6f6c6 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -4,7 +4,7 @@ import inspect import json import re import traceback -from typing import Callable, Dict, List, Literal, Tuple, Union +from typing import Callable, Dict, List, Literal, Tuple from pydantic import model_validator @@ -167,7 +167,7 @@ class RoleZero(Role): if self.use_fixed_sop: return await super()._act() - commands, ok = await self._get_commands() + commands, ok = await self._parse_commands() if not ok: error_msg = commands return error_msg @@ -202,7 +202,7 @@ class RoleZero(Role): actions_taken += 1 return rsp # return output from the last action - async def _get_commands(self) -> Tuple[Union[UserMessage, List[Dict]], bool]: + async def _parse_commands(self) -> Tuple[List[Dict], bool]: """Retrieves commands from the Large Language Model (LLM). This function attempts to retrieve a list of commands from the LLM by @@ -211,7 +211,6 @@ class RoleZero(Role): Returns: A tuple containing: - - A `UserMessage` object or dict representing the commands. - A boolean flag indicating success (True) or failure (False). """ try: diff --git a/metagpt/roles/di/swe.py b/metagpt/roles/di/swe.py index 1fd1324cf..6d357c02b 100644 --- a/metagpt/roles/di/swe.py +++ b/metagpt/roles/di/swe.py @@ -22,6 +22,7 @@ class SWE(RoleZero): _system_msg: str = SWE_AGENT_SYSTEM_TEMPLATE system_msg: list[str] = [_system_msg.format(WINDOW=_bash_window_size)] _instruction: str = NEXT_STEP_TEMPLATE + # tools: list[str] = ["Bash", "Browser"] tools: list[str] = ["Bash"] terminal: Bash = Field(default_factory=Bash, exclude=True) output_diff: str = "" @@ -35,11 +36,23 @@ class SWE(RoleZero): return res def _set_system_msg(self): + """ + Sets the system message for the SWE agent. + + Sets the `_bash_window_size` from the environment variable `WINDOW` if it exists. + Formats the `_system_msg` template with the current `_bash_window_size`. + """ if os.getenv("WINDOW"): self._bash_window_size = int(os.getenv("WINDOW")) self.system_msg = [self._system_msg.format(WINDOW=self._bash_window_size)] def _format_instruction(self): + """ + Formats the instruction message for the SWE agent. + + Runs the "state" command in the terminal, parses its output as JSON, + and uses it to format the `_instruction` template. + """ state_output = self.terminal.run("state") bash_state = json.loads(state_output) @@ -50,7 +63,15 @@ class SWE(RoleZero): return self.instruction async def _handle_action(self): - commands, ok = await self._get_commands() + """ + Handles actions based on parsed commands. + + Parses commands, checks for a "submit" action, and generates a patch using `git diff`. + Stores the cleaned patch in `output_diff`. Logs any exceptions. + + This function is specifically added for SWE bench evaluation. + """ + commands, ok = await self._parse_commands() if not ok: return for cmd in commands: diff --git a/metagpt/tools/swe_agent_commands/__init__.py b/metagpt/tools/swe_agent_commands/__init__.py new file mode 100644 index 000000000..42e92a12d --- /dev/null +++ b/metagpt/tools/swe_agent_commands/__init__.py @@ -0,0 +1,7 @@ +""" +This tool is originally developed by the team behind the princeton-nlp/SWE-agent repository. +You can find the original repository here: +https://github.com/princeton-nlp/SWE-agent/tree/main/config/commands +We are using a modified version from OpenDevin: +https://github.com/OpenDevin/OpenDevin/tree/main/opendevin/runtime/plugins/swe_agent_commands +""" diff --git a/metagpt/tools/swe_agent_commands/defaults.sh b/metagpt/tools/swe_agent_commands/defaults.sh index 880920711..f0898aabc 100644 --- a/metagpt/tools/swe_agent_commands/defaults.sh +++ b/metagpt/tools/swe_agent_commands/defaults.sh @@ -177,7 +177,7 @@ create() { # @yaml # signature: submit -# docstring: submits your current code and terminates the session +# docstring: submits your current code and terminates the session. this is the only submit action needed; no need to run git add or git commit before this. submit() { # Check if the patch file exists and is non-empty if [ -s "$SWE_CMD_WORK_DIR/test.patch" ]; then diff --git a/metagpt/tools/swe_agent_commands/execute_env_utils.py b/metagpt/tools/swe_agent_commands/execute_env_utils.py deleted file mode 100644 index 8fbc616fa..000000000 --- a/metagpt/tools/swe_agent_commands/execute_env_utils.py +++ /dev/null @@ -1,359 +0,0 @@ -import json -import os -import re -import select -import shlex -import subprocess -import tarfile -import tempfile -import threading -import time -import traceback -from io import BytesIO -from subprocess import PIPE, STDOUT -from typing import Tuple - -import docker -from datasets import load_dataset, load_from_disk -from ghapi.all import GhApi - -from metagpt.logs import logger - -LOGGER_NAME = "intercode" -START_UP_DELAY = 5 -TIMEOUT_DURATION = 25 -GITHUB_ISSUE_URL_PATTERN = re.compile(r"github\.com\/(.*?)\/(.*?)\/issues\/(\d+)") - - -def is_from_github_url(data_path: str): - return GITHUB_ISSUE_URL_PATTERN.search(data_path) is not None - - -def copy_file_to_container(container, contents, container_path): - """ - Copies a given string into a Docker container at a specified path. - - Args: - - container: Docker SDK container object. - - contents: The string to copy into the container. - - container_path: The path inside the container where the string should be copied to. - - Returns: - - None - """ - temp_file_name = None - - try: - # Create a temporary file - with tempfile.NamedTemporaryFile(delete=False) as temp_file: - temp_file_name = temp_file.name - # Write the string to the temporary file and ensure it's written to disk - temp_file.write(contents.encode("utf-8")) - temp_file.flush() - os.fsync(temp_file.fileno()) - - # Create a TAR archive in memory containing the temporary file - with tempfile.NamedTemporaryFile(): - with open(temp_file_name, "rb") as temp_file: - # Prepare the TAR archive - with BytesIO() as tar_stream: - with tarfile.open(fileobj=tar_stream, mode="w") as tar: - tar_info = tarfile.TarInfo(name=os.path.basename(container_path)) - tar_info.size = os.path.getsize(temp_file_name) - tar.addfile(tarinfo=tar_info, fileobj=temp_file) - tar_stream.seek(0) - # Copy the TAR stream to the container - container.put_archive(path=os.path.dirname(container_path), data=tar_stream.read()) - - except Exception as e: - logger.error(f"An error occurred: {e}") - logger.error(traceback.format_exc()) - finally: - # Cleanup: Remove the temporary file if it was created - if temp_file_name and os.path.exists(temp_file_name): - os.remove(temp_file_name) - - -def read_with_timeout(container, pid_func, timeout_duration): - """ - Read data from a subprocess with a timeout. - This function uses a file descriptor to read data from the subprocess in a non-blocking way. - - Args: - container (subprocess.Popen): The subprocess container. - pid_func (function): A function that returns a list of process IDs (except the PID of the main process). - timeout_duration (int): The timeout duration in seconds. - - Returns: - str: The data read from the subprocess, stripped of trailing newline characters. - - Raises: - TimeoutError: If the timeout duration is reached while reading from the subprocess. - """ - buffer = b"" - fd = container.stdout.fileno() - end_time = time.time() + timeout_duration - - while time.time() < end_time: - pids = pid_func() - if len(pids) > 0: - # There are still PIDs running - time.sleep(0.05) - continue - ready_to_read, _, _ = select.select([fd], [], [], 0.1) - if ready_to_read: - data = os.read(fd, 4096) - if data: - buffer += data - else: - # No more data to read - break - time.sleep(0.05) # Prevents CPU hogging - - if container.poll() is not None: - raise RuntimeError("Subprocess exited unexpectedly.\nCurrent buffer: {}".format(buffer.decode())) - if time.time() >= end_time: - raise TimeoutError( - "Timeout reached while reading from subprocess.\nCurrent buffer: {}\nRunning PIDs: {}".format( - buffer.decode(), pids - ) - ) - return buffer.decode() - - -class timeout: - def __init__(self, seconds=TIMEOUT_DURATION, error_message="Timeout"): - self.seconds = seconds - self.error_message = error_message - self.timer = None - self.timeout_occurred = False - - def handle_timeout(self, signum=None, frame=None): - self.timeout_occurred = True - - def __enter__(self): - # signal.signal(signal.SIGALRM, self.handle_timeout) - # signal.alarm(self.seconds) - self.timer = threading.Timer(self.seconds, self.handle_timeout) - self.timer.start() - return self - - def __exit__(self, type, value, traceback): - self.timer.cancel() - if self.timeout_occurred: - print(self.error_message) # 处理超时的逻辑 - - -def get_background_pids(container_obj): - pids = container_obj.exec_run("ps -eo pid,comm --no-headers").output.decode().split("\n") - pids = [x.split() for x in pids if x] - pids = [x for x in pids if x[1] not in {"ps"} and x[0] != "1"] - bash_pids = [x for x in pids if x[1] == "bash"] - other_pids = [x for x in pids if x[1] not in {"bash"}] - return bash_pids, other_pids - - -def _get_non_persistent_container(ctr_name: str, image_name: str) -> Tuple[subprocess.Popen, set]: - startup_cmd = [ - "docker", - "run", - "-i", - "--rm", - "--name", - ctr_name, - image_name, - "/bin/bash", - "-l", - "-m", - ] - - logger.debug("Starting container with command: %s", shlex.join(startup_cmd)) - container = subprocess.Popen( - startup_cmd, - stdin=PIPE, - stdout=PIPE, - stderr=STDOUT, - text=True, - bufsize=1, # line buffered - ) - time.sleep(START_UP_DELAY) - # try to read output from container setup (usually an error), timeout if no output - try: - with timeout(seconds=2): - output = container.stdout.read() - if output: - logger.error(f"Unexpected container setup output: {output}") - except TimeoutError: - pass - return container, { - "1", - } # bash PID is always 1 for non-persistent containers - - -def _get_persistent_container(ctr_name: str, image_name: str, persistent: bool = False) -> Tuple[subprocess.Popen, set]: - client = docker.from_env() - containers = client.containers.list(all=True, filters={"name": ctr_name}) - if ctr_name in [c.name for c in containers]: - container_obj = client.containers.get(ctr_name) - if container_obj.status in {"created"}: - container_obj.start() - elif container_obj.status in {"running"}: - pass - elif container_obj.status in {"exited"}: - container_obj.restart() - elif container_obj.status in {"paused"}: - container_obj.unpause() - else: - raise RuntimeError(f"Unexpected container status: {container_obj.status}") - else: - container_obj = client.containers.run( - image_name, - command="/bin/bash -l -m", - name=ctr_name, - stdin_open=True, - tty=True, - detach=True, - auto_remove=not persistent, - ) - container_obj.start() - startup_cmd = [ - "docker", - "exec", - "-i", - ctr_name, - "/bin/bash", - "-l", - "-m", - ] - logger.debug("Starting container with command: %s", shlex.join(startup_cmd)) - container = subprocess.Popen( - startup_cmd, - stdin=PIPE, - stdout=PIPE, - stderr=STDOUT, - text=True, - bufsize=1, # line buffered - ) - time.sleep(START_UP_DELAY) - # try to read output from container setup (usually an error), timeout if no output - try: - with timeout(seconds=2): - output = container.stdout.read() - if output: - logger.error(f"Unexpected container setup output: {output}") - except TimeoutError: - pass - # Get the process IDs of the container - # There should be at least a head process and possibly one child bash process - bash_pids, other_pids = get_background_pids(container_obj) - bash_pid = 1 - if len(bash_pids) == 1: - bash_pid = bash_pids[0][0] - elif len(bash_pids) > 1 or len(other_pids) > 0: - raise RuntimeError( - f"Detected alien processes attached or running. Please ensure that no other agents are running on this container. PIDs: {bash_pids}, {other_pids}" - ) - return container, set( - map( - str, - [ - bash_pid, - 1, - ], - ) - ) - - -def get_container(ctr_name: str, image_name: str, persistent: bool = False) -> subprocess.Popen: - """ - Get a container object for a given container name and image name - - Arguments: - ctr_name (str): Name of container - image_name (str): Name of image - persistent (bool): Whether to use a persistent container or not - Returns: - Container object - """ - if persistent: - return _get_persistent_container(ctr_name, image_name) - else: - return _get_non_persistent_container(ctr_name, image_name) - - -def get_commit(api: GhApi, owner: str, repo: str, base_commit: str = None): - if base_commit: - commit = api.repos.get_commit(owner, repo, base_commit) - else: - commit = api.repos.list_commits(owner, repo)[0] - return commit - - -class InvalidGithubURL(ValueError): - ... - - -def parse_gh_issue_url(issue_url: str) -> Tuple[str, str, str]: - """Return owner, repo, issue number from issue url""" - match = GITHUB_ISSUE_URL_PATTERN.search(issue_url) - if not match: - raise InvalidGithubURL(f"Invalid GitHub issue URL: {issue_url}") - res = match.groups() - assert len(res) == 3 - return tuple(res) # type: ignore - - -def get_instances(file_path: str, base_commit: str = None, split: str = None, token: str = None): - """ - Getter function for handling json, jsonl files - - Arguments: - file_path (str): Path to file - Returns: - List of instances - """ - # If file_path is a directory, attempt load from disk - if os.path.isdir(file_path): - dataset_or_dict = load_from_disk(file_path) - if isinstance(dataset_or_dict, dict): - return dataset_or_dict[split] - return dataset_or_dict - - # If file_path is a github issue url, fetch the issue and return a single instance - if is_from_github_url(file_path): - try: - owner, repo, issue_number = parse_gh_issue_url(file_path) - except InvalidGithubURL: - pass - else: - record = dict() - api = GhApi(token=token) - issue = api.issues.get(owner, repo, issue_number) - title = issue.title if issue.title else "" - body = issue.body if issue.body else "" - text = f"{title}\n{body}\n" - record["repo"] = f"{owner}/{repo}" - record["base_commit"] = base_commit if base_commit else get_commit(api, owner, repo, base_commit).sha - record["version"] = record["base_commit"][:7] - record["problem_statement"] = text - record["instance_id"] = f"{owner}__{repo}-i{issue_number}" - return [ - record, - ] - elif base_commit is not None: - raise ValueError("base_commit must be None if data_path is not a github issue url") - - # If file_path is a file, load the file - if file_path.endswith(".json"): - return json.load(open(file_path)) - if file_path.endswith(".jsonl"): - return [json.loads(x) for x in open(file_path, "r").readlines()] - - # Attempt load from HF datasets as a last resort - try: - return load_dataset(file_path, split=split) - except: - raise ValueError( - f"Could not load instances from {file_path}. " - "Please ensure --data_path is a GitHub URL, a SWE-bench HuggingFace dataset, or a JSON/JSONL file." - ) diff --git a/tests/metagpt/roles/di/run_swe.py b/tests/metagpt/roles/di/run_swe.py index 42694020f..c6cc56fd1 100644 --- a/tests/metagpt/roles/di/run_swe.py +++ b/tests/metagpt/roles/di/run_swe.py @@ -7,6 +7,7 @@ from metagpt.config2 import config from metagpt.const import DEFAULT_WORKSPACE_ROOT, METAGPT_ROOT from metagpt.logs import logger from metagpt.roles.di.swe import SWE +from metagpt.tools.libs.terminal import Terminal from metagpt.tools.swe_agent_commands.swe_agent_utils import load_hf_dataset # Specify by yourself @@ -25,8 +26,9 @@ We're currently solving the following issue within our repository. You can use a hints text is the comment under issue: {hints_text} -The repo may already exist at the path `{repo_path}` (if not, please download the repo to this path). -This issue occurred in version {version}, with the corresponding base commit being {base_commit}, you need to switch to the code version corresponding to this commit. +The repository may already exist at the path `{repo_path}`. If it doesn't, please download the repository to this path. +All your subsequent actions should use the project path as your root directory, and you should never leave that directory to execute any actions. +This issue occurred in version {version}, with the corresponding base commit being {base_commit}. You need to switch to the code version associated with this commit. # INSTRUCTIONS: Now, you're going to solve this issue on your own from the perspective of a programmer. Your terminal session has started and you're in the repository's root directory. You can use any bash commands or the special interface to help you. Edit all the files you need. @@ -86,7 +88,11 @@ async def run(instance, swe_result_dir): return repo_path = TEST_REPO_DIR / (instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"]) - + """ + All your subsequent actions should use the project path as your root directory, and you should never leave that directory to execute any actions. + """ + terminal = Terminal() + terminal.run_command(f"cd {repo_path} && git checkout . && git clean -n -d && git clean -f -d") user_requirement_and_issue = INSTANCE_TEMPLATE.format( issue=instance["problem_statement"], hints_text=instance["hints_text"], @@ -122,10 +128,10 @@ async def async_main(): dataset = load_hf_dataset(dataset_name_or_path=dataset_path, cache_dir=DATA_DIR, split="test") sample_datasets = split_dataset_equally(dataset) date_time = datetime.now().strftime("%m-%d") - round_ = "first" + round_ = "third" for idx, sub_dataset in enumerate(sample_datasets): - exp_name = f"Nano-test-{date_time}-{round_}-part-{idx}" + exp_name = f"nano_mgx_{date_time}_{round_}_part_{idx}" swe_result_dir = DEFAULT_WORKSPACE_ROOT / f"result_{config.llm.model}" / exp_name swe_result_dir.mkdir(parents=True, exist_ok=True) for instance in sub_dataset: From 45b35525105dd5d3000034ed4ff6423f29bec1bb Mon Sep 17 00:00:00 2001 From: your-username Date: Wed, 3 Jul 2024 20:11:32 +0800 Subject: [PATCH 03/10] update: SWE Agent --- metagpt/const.py | 2 +- metagpt/prompts/di/swe.py | 87 +++++++++++++------ metagpt/roles/di/swe.py | 8 +- metagpt/tools/libs/browser.py | 2 + metagpt/tools/libs/terminal.py | 7 +- metagpt/tools/swe_agent_commands/defaults.sh | 9 +- .../swe_agent_commands/swe_agent_utils.py | 8 +- tests/metagpt/roles/di/run_swe.py | 71 +++++---------- 8 files changed, 103 insertions(+), 91 deletions(-) diff --git a/metagpt/const.py b/metagpt/const.py index c78a22641..94d22bc70 100644 --- a/metagpt/const.py +++ b/metagpt/const.py @@ -149,6 +149,6 @@ METAGPT_REPORTER_DEFAULT_URL = os.environ.get("METAGPT_REPORTER_URL", "") # Metadata defines AGENT = "agent" - +SWE_WORKSPACE_ROOT = Path("/tmp/swe_workspace") # SWE agent SWE_SETUP_PATH = METAGPT_ROOT / "metagpt/tools/swe_agent_commands/setup_default.sh" diff --git a/metagpt/prompts/di/swe.py b/metagpt/prompts/di/swe.py index 64c67b09b..ed1f8a011 100644 --- a/metagpt/prompts/di/swe.py +++ b/metagpt/prompts/di/swe.py @@ -4,19 +4,17 @@ You can find the original examples from the SWE-agent project here: https://github.com/princeton-nlp/SWE-agent/tree/main/config/configs """ - SWE_AGENT_SYSTEM_TEMPLATE = """ -SETTING: You are an autonomous programmer, and you're working directly in the command line with a special interface. +SETTING: You are an autonomous programmer, and you're working directly in the environment line with a special interface. The special interface consists of a file editor that shows you {WINDOW} lines of a file at a time. Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. Pay attention to the original indentation when replacing the function. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. - Always review your changes post-edit to ensure they accurately reflect your intentions. If the changes are not as desired, don't hesitate to issue another command to correct them. Your output should always contain a section of reasoning and a command described in JSON format. -The command must always contain command_name and args fields. The command_name field should always be Bash.run, and the args field should always include a cmd field containing the bash command. + Use \\n to represent line breaks, ensuring the command conforms to the JSON format and is displayed on a single line. Except for the `edit` command, each parameter of the command needs to be enclosed in single quotes. As shown in the example below: @@ -31,42 +29,75 @@ First I'll start by using ls to see what files are in the current directory. The }} ``` - You should only include a *SINGLE* command in the command section and then wait for a response from the shell before continuing with more discussion and commands. Everything you include in the DISCUSSION section will be saved for future reference. If you'd like to issue two commands at once, PLEASE DO NOT DO THAT! Please instead first submit just the first command, and then after receiving a response you'll be able to issue the second command. -You're free to use any other bash commands you want (e.g. find, grep, cat, ls, cd) in addition to the special commands listed above. +Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command. + +You can use any bash commands you want (e.g., find, grep, cat, ls, cd) or any custom special tools (including `edit`) by calling Bash.run. Edit all the files you need. You should carefully observe the behavior and results of the previous action, and avoid triggering repeated errors. -However, the environment does NOT support interactive session commands (e.g. python, vim), so please do not invoke them. +However, the Bash.run does NOT support interactive session commands (e.g. python, vim), so please do not invoke them. + +In addition to the terminal, I also provide additional tools. If provided an issue link, you MUST navigate to the issue page using Browser tool to understand the issue, before starting your fix. + +# INSTRUCTIONS: +Your first action must be to check if the repository exists at the current path. If it exists, navigate to the repository path. If the repository doesn't exist, please download it and then navigate to it. +All subsequent actions must be performed within this repository path. Do not leave this directory to execute any actions at any time. +Your terminal session has started, and you can use any bash commands or the special interface to help you. Edit all the files you need. """ MINIMAL_EXAMPLE = """ ## Example of a actions trajectory User Requirement and Issue: Fix the bug in the repo. Because the environment is not available, you DO NOT need to run and modify any existing test case files or add new test case files to ensure that the bug is fixed. -### Locate issue(Require): Locate the issue in the code by searching for the relevant file, function, or class and open the file to view the code. -cd /workspace/django__django_3.0 +### Read and understand issue(Require): +{{ + "command_name": "Browser.goto", + "args": {{ + "url": "https://github.com/geekan/MetaGPT/issues/1275" + }} +}} -> -search_dir_and_preview ASCIIUsernameValidator --> -open /workspace/django__django_3.0/django/contrib/auth/validators.py --> -### Fix the Bug(Require): Fix the bug in the code by editing the relevant function, class or code snippet. -edit 10:20 < + +Bash.run(cmd='search_dir_and_preview ASCIIUsernameValidator') +{{ + "command_name": "Bash.run", + "args": {{ + "cmd": "open /workspace/django__django_3.0/django/contrib/auth/validators.py" + }} +}} +-> + +### Fix the Bug(Require): Fix the bug in the code by editing the relevant function, class or code snippet. +{{ + "command_name": "Bash.run", + "args": {{ + "cmd": "edit 10:20 < + ### Submit the Changes(Require): Submit the changes to the repository. -submit +{{ + "command_name": "Bash.run", + "args": {{ + "cmd": "submit" + }} +}} +Bash.run(cmd='submit') +-> +{{ + "command_name": "end", +}} """ @@ -132,6 +163,10 @@ IMPORTANT_TIPS = """ - Based on feedback of observation or bash command in trajectory to guide adjustments in your search strategy. 13. If the task results in succeed, fail, or NO PROGRESS, output `submit`. + +14. If provided an issue link, you MUST go to the issue page using Browser tool to understand the issue before starting your fix. + +15. When the edit fails, try to enlarge the starting line. """ NEXT_STEP_TEMPLATE = f""" diff --git a/metagpt/roles/di/swe.py b/metagpt/roles/di/swe.py index 6d357c02b..915d186b4 100644 --- a/metagpt/roles/di/swe.py +++ b/metagpt/roles/di/swe.py @@ -22,8 +22,7 @@ class SWE(RoleZero): _system_msg: str = SWE_AGENT_SYSTEM_TEMPLATE system_msg: list[str] = [_system_msg.format(WINDOW=_bash_window_size)] _instruction: str = NEXT_STEP_TEMPLATE - # tools: list[str] = ["Bash", "Browser"] - tools: list[str] = ["Bash"] + tools: list[str] = ["Bash", "Browser:goto,scroll"] terminal: Bash = Field(default_factory=Bash, exclude=True) output_diff: str = "" max_react_loop: int = 30 @@ -75,11 +74,10 @@ class SWE(RoleZero): if not ok: return for cmd in commands: - if "submit" not in cmd.get("args", {}).get("cmd", ""): + if "end" != cmd.get("command_name", ""): return try: - # Generate patch by git diff - diff_output = self.terminal.run("git diff") + diff_output = self.terminal.run("git diff --cached") clear_diff = extract_patch(diff_output) logger.info(f"Diff output: \n{clear_diff}") if clear_diff: diff --git a/metagpt/tools/libs/browser.py b/metagpt/tools/libs/browser.py index c6ea71bd5..864996e8c 100644 --- a/metagpt/tools/libs/browser.py +++ b/metagpt/tools/libs/browser.py @@ -122,6 +122,8 @@ class Browser: async def goto(self, url: str, timeout: float = 30000): """Navigate to a specific URL.""" + if self.page is None: + await self.start() async with self.reporter as reporter: await reporter.async_report(url, "url") await self.page.goto(url, timeout=timeout) diff --git a/metagpt/tools/libs/terminal.py b/metagpt/tools/libs/terminal.py index 938eadff4..a04acb8e9 100644 --- a/metagpt/tools/libs/terminal.py +++ b/metagpt/tools/libs/terminal.py @@ -2,7 +2,7 @@ import subprocess import threading from queue import Queue -from metagpt.const import SWE_SETUP_PATH +from metagpt.const import SWE_SETUP_PATH, SWE_WORKSPACE_ROOT from metagpt.tools.tool_registry import register_tool from metagpt.utils.report import END_MARKER_VALUE, TerminalReporter @@ -136,13 +136,14 @@ class Terminal: class Bash(Terminal): """ A class to run bash commands directly and provides custom shell functions. + All custom functions in this class can ONLY be called via the `Bash.run` method. """ def __init__(self): """init""" super().__init__() setup_cmd = f"source {SWE_SETUP_PATH}" - self.run_command(setup_cmd) + self.run_command(f"cd {SWE_WORKSPACE_ROOT} && {setup_cmd}") def run(self, cmd) -> str: """ @@ -184,7 +185,7 @@ class Bash(Terminal): filename (str): The name of the file to create. - submit - Submits your current code and terminates the session. + Submits your current code. it can only be executed once, the last action before the `end`. - search_dir_and_preview [] Searches for search_term in all files in dir and gives their code preview diff --git a/metagpt/tools/swe_agent_commands/defaults.sh b/metagpt/tools/swe_agent_commands/defaults.sh index f0898aabc..d416dcbf5 100644 --- a/metagpt/tools/swe_agent_commands/defaults.sh +++ b/metagpt/tools/swe_agent_commands/defaults.sh @@ -177,7 +177,7 @@ create() { # @yaml # signature: submit -# docstring: submits your current code and terminates the session. this is the only submit action needed; no need to run git add or git commit before this. +# docstring: submits your current code. the last action before the `end`, it can only be executed once. submit() { # Check if the patch file exists and is non-empty if [ -s "$SWE_CMD_WORK_DIR/test.patch" ]; then @@ -186,8 +186,7 @@ submit() { fi git add -A - git diff --cached > model.patch - echo "<>" + echo "<>" } diff --git a/metagpt/tools/swe_agent_commands/swe_agent_utils.py b/metagpt/tools/swe_agent_commands/swe_agent_utils.py index 8c01dc9c9..9e293f4d2 100644 --- a/metagpt/tools/swe_agent_commands/swe_agent_utils.py +++ b/metagpt/tools/swe_agent_commands/swe_agent_utils.py @@ -16,10 +16,12 @@ def extract_patch(command_output): def load_hf_dataset(dataset_name_or_path: str, cache_dir, split: str = "test", existing_ids: list = []): - if Path(dataset_name_or_path).exists(): - dataset = load_from_disk(dataset_name_or_path) + data_dir = cache_dir / dataset_name_or_path + if Path(data_dir).exists(): + dataset = load_from_disk(data_dir) else: - dataset = load_dataset(dataset_name_or_path, cache_dir=cache_dir) + dataset = load_dataset(dataset_name_or_path) + dataset.save_to_disk(data_dir) print(dataset) if split not in dataset: raise ValueError(f"Invalid split {split} for dataset {dataset_name_or_path}") diff --git a/tests/metagpt/roles/di/run_swe.py b/tests/metagpt/roles/di/run_swe.py index c6cc56fd1..f9d19be74 100644 --- a/tests/metagpt/roles/di/run_swe.py +++ b/tests/metagpt/roles/di/run_swe.py @@ -12,7 +12,7 @@ from metagpt.tools.swe_agent_commands.swe_agent_utils import load_hf_dataset # Specify by yourself TEST_REPO_DIR = Path("/Users/seeker/Projects/sdfz/mg/mg-swe-agent") / "benchmark" / "swe_bench" / "data" / "test_repo" -DATA_DIR = METAGPT_ROOT / "benchmark" / "swe_bench" / "data" +DATA_DIR = METAGPT_ROOT / "data/hugging_face" INSTANCE_TEMPLATE = """ ## User Requirement @@ -27,8 +27,9 @@ hints text is the comment under issue: {hints_text} The repository may already exist at the path `{repo_path}`. If it doesn't, please download the repository to this path. -All your subsequent actions should use the project path as your root directory, and you should never leave that directory to execute any actions. +Your first action must be to navigate to the repository path `{repo_path}`. This issue occurred in version {version}, with the corresponding base commit being {base_commit}. You need to switch to the code version associated with this commit. +All subsequent actions must be performed within this repository path. Do not leave this directory to execute any actions at any time. # INSTRUCTIONS: Now, you're going to solve this issue on your own from the perspective of a programmer. Your terminal session has started and you're in the repository's root directory. You can use any bash commands or the special interface to help you. Edit all the files you need. @@ -36,38 +37,6 @@ Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for f """ -def split_dataset_equally(dataset): - # 计算索引 - # fixme: 设置django - - part1 = dataset.filter( - lambda x: x["repo"] - not in [ - "django/django", - "sympy/sympy", - "pytest-dev/pytest", - ], - desc="Filtering out existing ids", - load_from_cache_file=True, - ) - - part2 = dataset.filter( - lambda x: x["repo"] in ["sympy/sympy", "pytest-dev/pytest"], - desc="Filtering out existing ids", - load_from_cache_file=True, - ) - - part3 = dataset.filter( - lambda x: x["repo"] in ["django/django"], - desc="Filtering out existing ids", - load_from_cache_file=False, - ) - - print(len(part1), len(part2), len(part3)) - - return [part1, part2, part3] - - def check_instance_status(instance, swe_result_dir): output_file = swe_result_dir / "all_preds.jsonl" res = True @@ -87,12 +56,20 @@ async def run(instance, swe_result_dir): logger.info(f"Instance {instance['instance_id']} already exists, skipping execution.") return - repo_path = TEST_REPO_DIR / (instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"]) - """ - All your subsequent actions should use the project path as your root directory, and you should never leave that directory to execute any actions. - """ + repo_path = Path("/Users/seeker/Projects/other/test_repo") / ( + instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"] + ) + # repo_path = Path("/Users/seeker/Projects/other/test_repo") / instance["repo"].split("/")[-1] + + # 前处理 terminal = Terminal() - terminal.run_command(f"cd {repo_path} && git checkout . && git clean -n -d && git clean -f -d") + terminal.run_command(f"cd {repo_path} && git reset --hard && git clean -n -d && git clean -f -d") + terminal.run_command("BRANCH=$(git remote show origin | awk '/HEAD branch/ {print $NF}')") + logger.info(terminal.run_command("echo $BRANCH")) + # logger.info(terminal.run_command(f'Branch name: $BRANCH')) + logger.info(terminal.run_command('git checkout "$BRANCH"')) + logger.info(terminal.run_command("git branch")) + user_requirement_and_issue = INSTANCE_TEMPLATE.format( issue=instance["problem_statement"], hints_text=instance["hints_text"], @@ -126,16 +103,14 @@ async def async_main(): dataset_path = "manna-ai/SWE-bench_Nano" # "princeton-nlp/SWE-bench_Lite" #"manna-ai/SWE-bench_Nano" dataset = load_hf_dataset(dataset_name_or_path=dataset_path, cache_dir=DATA_DIR, split="test") - sample_datasets = split_dataset_equally(dataset) date_time = datetime.now().strftime("%m-%d") - round_ = "third" - - for idx, sub_dataset in enumerate(sample_datasets): - exp_name = f"nano_mgx_{date_time}_{round_}_part_{idx}" - swe_result_dir = DEFAULT_WORKSPACE_ROOT / f"result_{config.llm.model}" / exp_name - swe_result_dir.mkdir(parents=True, exist_ok=True) - for instance in sub_dataset: - await run(instance, swe_result_dir) + # _round = "first" + _round = "second" + exp_name = f"nano_mgx_{date_time}_{_round}" + swe_result_dir = DEFAULT_WORKSPACE_ROOT / f"result_{config.llm.model.replace('/', '_')}" / exp_name + swe_result_dir.mkdir(parents=True, exist_ok=True) + for instance in dataset: + await run(instance, swe_result_dir) if __name__ == "__main__": From 00d683251b9f0df4613ad4a1a4ea9dcdec0191cd Mon Sep 17 00:00:00 2001 From: seeker Date: Wed, 3 Jul 2024 20:26:55 +0800 Subject: [PATCH 04/10] update: SWE Agent --- tests/metagpt/roles/di/run_swe_new.py | 38 +++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 tests/metagpt/roles/di/run_swe_new.py diff --git a/tests/metagpt/roles/di/run_swe_new.py b/tests/metagpt/roles/di/run_swe_new.py new file mode 100644 index 000000000..94ef1e644 --- /dev/null +++ b/tests/metagpt/roles/di/run_swe_new.py @@ -0,0 +1,38 @@ +import asyncio + +from metagpt.logs import logger +from metagpt.roles.di.swe import SWE + +FIX_ISSUE1 = """ +Write a fix for this issue: https://github.com/langchain-ai/langchain/issues/20453, +you can fix it on this repo https://github.com/garylin2099/langchain, +""" + +"checkout a branch named test-fix, commit your changes, push, and create a PR to the master branch of https://github.com/iorisa/langchain" + +FIX_ISSUE2 = """ +Write a fix for this issue https://github.com/geekan/MetaGPT/issues/1275. +You can fix it on the v0.8-release branch of this repo https://github.com/garylin2099/MetaGPT, +during fixing, checkout a branch named test-fix-1275, commit your changes, push, and create a PR to the v0.8-release branch of https://github.com/garylin2099/MetaGPT +""" +FIX_ISSUE3 = """ +Write a fix for this issue https://github.com/geekan/MetaGPT/issues/1262. +You can fix it on this repo https://github.com/garylin2099/MetaGPT, +during fixing, checkout a branch named test-fix-1262, commit your changes, push, and create a PR to https://github.com/garylin2099/MetaGPT +""" +FIX_ISSUE_SIMPLE = """ +Write a fix for this issue: https://github.com/mannaandpoem/simple_calculator/issues/1, +you can fix it on this repo https://github.com/garylin2099/simple_calculator, +checkout a branch named test, commit your changes, push, and create a PR to the master branch of original repo. +""" + +if __name__ == "__main__": + swe_agent = SWE() + logger.info("**** Starting run ****") + user_requirement_and_issue = ( + FIX_ISSUE1 + + """Because the environment is not available, you DO NOT need to run and modify any existing test case files or add new test case files to ensure that the bug is fixed.""" + ) + asyncio.run(swe_agent.run(user_requirement_and_issue)) + logger.info("**** Finished running ****") + logger.info(f"Patch: {swe_agent.output_diff}") From 2e35f6666946a931746cfb7d845a77de8e488b67 Mon Sep 17 00:00:00 2001 From: your-username Date: Wed, 3 Jul 2024 22:37:17 +0800 Subject: [PATCH 05/10] update: SWE Agent --- metagpt/const.py | 2 ++ metagpt/roles/di/swe.py | 2 +- tests/metagpt/roles/di/run_swe_new.py | 11 +++++++---- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/metagpt/const.py b/metagpt/const.py index 94d22bc70..6a0e2c4bb 100644 --- a/metagpt/const.py +++ b/metagpt/const.py @@ -150,5 +150,7 @@ METAGPT_REPORTER_DEFAULT_URL = os.environ.get("METAGPT_REPORTER_URL", "") # Metadata defines AGENT = "agent" SWE_WORKSPACE_ROOT = Path("/tmp/swe_workspace") +if not SWE_WORKSPACE_ROOT.exists(): + SWE_WORKSPACE_ROOT.mkdir(parents=True) # SWE agent SWE_SETUP_PATH = METAGPT_ROOT / "metagpt/tools/swe_agent_commands/setup_default.sh" diff --git a/metagpt/roles/di/swe.py b/metagpt/roles/di/swe.py index 915d186b4..dde686e6a 100644 --- a/metagpt/roles/di/swe.py +++ b/metagpt/roles/di/swe.py @@ -25,7 +25,7 @@ class SWE(RoleZero): tools: list[str] = ["Bash", "Browser:goto,scroll"] terminal: Bash = Field(default_factory=Bash, exclude=True) output_diff: str = "" - max_react_loop: int = 30 + max_react_loop: int = 40 async def _think(self) -> bool: self._set_system_msg() diff --git a/tests/metagpt/roles/di/run_swe_new.py b/tests/metagpt/roles/di/run_swe_new.py index 94ef1e644..8ed1e9107 100644 --- a/tests/metagpt/roles/di/run_swe_new.py +++ b/tests/metagpt/roles/di/run_swe_new.py @@ -7,14 +7,14 @@ FIX_ISSUE1 = """ Write a fix for this issue: https://github.com/langchain-ai/langchain/issues/20453, you can fix it on this repo https://github.com/garylin2099/langchain, """ - -"checkout a branch named test-fix, commit your changes, push, and create a PR to the master branch of https://github.com/iorisa/langchain" +# + "checkout a branch named test-fix, commit your changes, push, and create a PR to the master branch of https://github.com/iorisa/langchain" FIX_ISSUE2 = """ Write a fix for this issue https://github.com/geekan/MetaGPT/issues/1275. You can fix it on the v0.8-release branch of this repo https://github.com/garylin2099/MetaGPT, -during fixing, checkout a branch named test-fix-1275, commit your changes, push, and create a PR to the v0.8-release branch of https://github.com/garylin2099/MetaGPT """ +# + "during fixing, checkout a branch named test-fix-1275, commit your changes, push, and create a PR to the v0.8-release branch of https://github.com/garylin2099/MetaGPT" + FIX_ISSUE3 = """ Write a fix for this issue https://github.com/geekan/MetaGPT/issues/1262. You can fix it on this repo https://github.com/garylin2099/MetaGPT, @@ -30,7 +30,10 @@ if __name__ == "__main__": swe_agent = SWE() logger.info("**** Starting run ****") user_requirement_and_issue = ( - FIX_ISSUE1 + # FIX_ISSUE1 + # FIX_ISSUE2 + # FIX_ISSUE3 + FIX_ISSUE_SIMPLE + """Because the environment is not available, you DO NOT need to run and modify any existing test case files or add new test case files to ensure that the bug is fixed.""" ) asyncio.run(swe_agent.run(user_requirement_and_issue)) From c717f2dc3ebc234078cdeae7f21e22f61eb44503 Mon Sep 17 00:00:00 2001 From: your-username Date: Wed, 3 Jul 2024 22:46:21 +0800 Subject: [PATCH 06/10] update: SWE Agent --- metagpt/tools/libs/terminal.py | 4 ++-- tests/metagpt/roles/di/run_swe.py | 15 +++++---------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/metagpt/tools/libs/terminal.py b/metagpt/tools/libs/terminal.py index a04acb8e9..3270b0623 100644 --- a/metagpt/tools/libs/terminal.py +++ b/metagpt/tools/libs/terminal.py @@ -142,8 +142,8 @@ class Bash(Terminal): def __init__(self): """init""" super().__init__() - setup_cmd = f"source {SWE_SETUP_PATH}" - self.run_command(f"cd {SWE_WORKSPACE_ROOT} && {setup_cmd}") + self.run_command(f"cd {SWE_WORKSPACE_ROOT}") + self.run_command(f"source {SWE_SETUP_PATH}") def run(self, cmd) -> str: """ diff --git a/tests/metagpt/roles/di/run_swe.py b/tests/metagpt/roles/di/run_swe.py index f9d19be74..9965107db 100644 --- a/tests/metagpt/roles/di/run_swe.py +++ b/tests/metagpt/roles/di/run_swe.py @@ -1,7 +1,6 @@ import asyncio import json from datetime import datetime -from pathlib import Path from metagpt.config2 import config from metagpt.const import DEFAULT_WORKSPACE_ROOT, METAGPT_ROOT @@ -11,7 +10,7 @@ from metagpt.tools.libs.terminal import Terminal from metagpt.tools.swe_agent_commands.swe_agent_utils import load_hf_dataset # Specify by yourself -TEST_REPO_DIR = Path("/Users/seeker/Projects/sdfz/mg/mg-swe-agent") / "benchmark" / "swe_bench" / "data" / "test_repo" +TEST_REPO_DIR = METAGPT_ROOT / "data" / "test_repo" DATA_DIR = METAGPT_ROOT / "data/hugging_face" INSTANCE_TEMPLATE = """ @@ -56,17 +55,13 @@ async def run(instance, swe_result_dir): logger.info(f"Instance {instance['instance_id']} already exists, skipping execution.") return - repo_path = Path("/Users/seeker/Projects/other/test_repo") / ( - instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"] - ) - # repo_path = Path("/Users/seeker/Projects/other/test_repo") / instance["repo"].split("/")[-1] + repo_path = TEST_REPO_DIR / instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"] # 前处理 terminal = Terminal() terminal.run_command(f"cd {repo_path} && git reset --hard && git clean -n -d && git clean -f -d") terminal.run_command("BRANCH=$(git remote show origin | awk '/HEAD branch/ {print $NF}')") logger.info(terminal.run_command("echo $BRANCH")) - # logger.info(terminal.run_command(f'Branch name: $BRANCH')) logger.info(terminal.run_command('git checkout "$BRANCH"')) logger.info(terminal.run_command("git branch")) @@ -103,9 +98,9 @@ async def async_main(): dataset_path = "manna-ai/SWE-bench_Nano" # "princeton-nlp/SWE-bench_Lite" #"manna-ai/SWE-bench_Nano" dataset = load_hf_dataset(dataset_name_or_path=dataset_path, cache_dir=DATA_DIR, split="test") - date_time = datetime.now().strftime("%m-%d") - # _round = "first" - _round = "second" + date_time = datetime.now().strftime("%m%d") + _round = "first" + # _round = "second" exp_name = f"nano_mgx_{date_time}_{_round}" swe_result_dir = DEFAULT_WORKSPACE_ROOT / f"result_{config.llm.model.replace('/', '_')}" / exp_name swe_result_dir.mkdir(parents=True, exist_ok=True) From ad8204e15d4117369ee3c91b6460779cd231577b Mon Sep 17 00:00:00 2001 From: your-username Date: Thu, 4 Jul 2024 16:07:22 +0800 Subject: [PATCH 07/10] update: SWE Agent --- metagpt/roles/di/{swe.py => swe_agent.py} | 12 +++--- metagpt/tools/libs/terminal.py | 4 +- metagpt/tools/swe_agent_commands/__init__.py | 2 +- ...n_swe.py => run_sweagent_for_benchmark.py} | 7 ++-- ...w.py => run_sweagent_open_source_issue.py} | 41 ++++++++++--------- 5 files changed, 36 insertions(+), 30 deletions(-) rename metagpt/roles/di/{swe.py => swe_agent.py} (92%) rename tests/metagpt/roles/di/{run_swe.py => run_sweagent_for_benchmark.py} (96%) rename tests/metagpt/roles/di/{run_swe_new.py => run_sweagent_open_source_issue.py} (53%) diff --git a/metagpt/roles/di/swe.py b/metagpt/roles/di/swe_agent.py similarity index 92% rename from metagpt/roles/di/swe.py rename to metagpt/roles/di/swe_agent.py index dde686e6a..e86b50a9d 100644 --- a/metagpt/roles/di/swe.py +++ b/metagpt/roles/di/swe_agent.py @@ -14,9 +14,9 @@ from metagpt.tools.libs.terminal import Bash from metagpt.tools.swe_agent_commands.swe_agent_utils import extract_patch -class SWE(RoleZero): - name: str = "SweAgent" - profile: str = "Software Engineer" +class SWEAgent(RoleZero): + name: str = "Swen" + profile: str = "Issue Solver" goal: str = "Resolve GitHub issue" _bash_window_size: int = 100 _system_msg: str = SWE_AGENT_SYSTEM_TEMPLATE @@ -26,12 +26,14 @@ class SWE(RoleZero): terminal: Bash = Field(default_factory=Bash, exclude=True) output_diff: str = "" max_react_loop: int = 40 + run_eval: bool = False async def _think(self) -> bool: self._set_system_msg() self._format_instruction() res = await super()._think() - await self._handle_action() + if self.run_eval: + await self._parse_commands_for_eval() return res def _set_system_msg(self): @@ -61,7 +63,7 @@ class SWE(RoleZero): return self.instruction - async def _handle_action(self): + async def _parse_commands_for_eval(self): """ Handles actions based on parsed commands. diff --git a/metagpt/tools/libs/terminal.py b/metagpt/tools/libs/terminal.py index 3270b0623..bcf039a5e 100644 --- a/metagpt/tools/libs/terminal.py +++ b/metagpt/tools/libs/terminal.py @@ -2,7 +2,7 @@ import subprocess import threading from queue import Queue -from metagpt.const import SWE_SETUP_PATH, SWE_WORKSPACE_ROOT +from metagpt.const import DEFAULT_WORKSPACE_ROOT, SWE_SETUP_PATH from metagpt.tools.tool_registry import register_tool from metagpt.utils.report import END_MARKER_VALUE, TerminalReporter @@ -142,7 +142,7 @@ class Bash(Terminal): def __init__(self): """init""" super().__init__() - self.run_command(f"cd {SWE_WORKSPACE_ROOT}") + self.run_command(f"cd {DEFAULT_WORKSPACE_ROOT}") self.run_command(f"source {SWE_SETUP_PATH}") def run(self, cmd) -> str: diff --git a/metagpt/tools/swe_agent_commands/__init__.py b/metagpt/tools/swe_agent_commands/__init__.py index 42e92a12d..c0d3e2a60 100644 --- a/metagpt/tools/swe_agent_commands/__init__.py +++ b/metagpt/tools/swe_agent_commands/__init__.py @@ -1,5 +1,5 @@ """ -This tool is originally developed by the team behind the princeton-nlp/SWE-agent repository. +This folder is borrowed from princeton-nlp/SWE-agent You can find the original repository here: https://github.com/princeton-nlp/SWE-agent/tree/main/config/commands We are using a modified version from OpenDevin: diff --git a/tests/metagpt/roles/di/run_swe.py b/tests/metagpt/roles/di/run_sweagent_for_benchmark.py similarity index 96% rename from tests/metagpt/roles/di/run_swe.py rename to tests/metagpt/roles/di/run_sweagent_for_benchmark.py index 9965107db..cdd8df5d7 100644 --- a/tests/metagpt/roles/di/run_swe.py +++ b/tests/metagpt/roles/di/run_sweagent_for_benchmark.py @@ -5,7 +5,7 @@ from datetime import datetime from metagpt.config2 import config from metagpt.const import DEFAULT_WORKSPACE_ROOT, METAGPT_ROOT from metagpt.logs import logger -from metagpt.roles.di.swe import SWE +from metagpt.roles.di.swe_agent import SWEAgent from metagpt.tools.libs.terminal import Terminal from metagpt.tools.swe_agent_commands.swe_agent_utils import load_hf_dataset @@ -74,13 +74,14 @@ async def run(instance, swe_result_dir): ) logger.info(f"**** Starting to run {instance['instance_id']}****") - swe_agent = SWE() + swe_agent = SWEAgent() + swe_agent.run_eval = True await swe_agent.run(user_requirement_and_issue) save_predictions(swe_agent, instance, swe_result_dir) logger.info(f"**** Finished running {instance['instance_id']}****") -def save_predictions(swe_agent: SWE, instance, swe_result_dir): +def save_predictions(swe_agent: SWEAgent, instance, swe_result_dir): output_file = swe_result_dir / "all_preds.jsonl" instance["model_name_or_path"] = swe_agent.config.llm.model instance["model_patch"] = swe_agent.output_diff diff --git a/tests/metagpt/roles/di/run_swe_new.py b/tests/metagpt/roles/di/run_sweagent_open_source_issue.py similarity index 53% rename from tests/metagpt/roles/di/run_swe_new.py rename to tests/metagpt/roles/di/run_sweagent_open_source_issue.py index 8ed1e9107..ec87dd7e2 100644 --- a/tests/metagpt/roles/di/run_swe_new.py +++ b/tests/metagpt/roles/di/run_sweagent_open_source_issue.py @@ -1,41 +1,44 @@ import asyncio from metagpt.logs import logger -from metagpt.roles.di.swe import SWE +from metagpt.roles.di.swe_agent import SWEAgent FIX_ISSUE1 = """ Write a fix for this issue: https://github.com/langchain-ai/langchain/issues/20453, -you can fix it on this repo https://github.com/garylin2099/langchain, +you can fix it on this repo https://github.com/garylin2099/langchain """ -# + "checkout a branch named test-fix, commit your changes, push, and create a PR to the master branch of https://github.com/iorisa/langchain" - +# + "checkout a branch named test-fix, commit your changes, push, +# and create a PR to the master branch of https://github.com/iorisa/langchain" +# """ FIX_ISSUE2 = """ Write a fix for this issue https://github.com/geekan/MetaGPT/issues/1275. -You can fix it on the v0.8-release branch of this repo https://github.com/garylin2099/MetaGPT, +You can fix it on the v0.8-release branch of this repo https://github.com/garylin2099/MetaGPT """ -# + "during fixing, checkout a branch named test-fix-1275, commit your changes, push, and create a PR to the v0.8-release branch of https://github.com/garylin2099/MetaGPT" +# + "during fixing, checkout a branch named test-fix-1275, commit your changes, push, +# and create a PR to the v0.8-release branch of https://github.com/garylin2099/MetaGPT" FIX_ISSUE3 = """ Write a fix for this issue https://github.com/geekan/MetaGPT/issues/1262. -You can fix it on this repo https://github.com/garylin2099/MetaGPT, -during fixing, checkout a branch named test-fix-1262, commit your changes, push, and create a PR to https://github.com/garylin2099/MetaGPT +You can fix it on this repo https://github.com/garylin2099/MetaGPT """ +# during fixing, checkout a branch named test-fix-1262, commit your changes, push, +# and create a PR to https://github.com/garylin2099/MetaGPT +# """ FIX_ISSUE_SIMPLE = """ Write a fix for this issue: https://github.com/mannaandpoem/simple_calculator/issues/1, -you can fix it on this repo https://github.com/garylin2099/simple_calculator, -checkout a branch named test, commit your changes, push, and create a PR to the master branch of original repo. +you can fix it on this repo https://github.com/garylin2099/simple_calculator """ +# checkout a branch named test, commit your changes, push, and create a PR to the master branch of original repo. +# """ + +NO_ENV_TIP = """ +Because the environment is not available, you DO NOT need to run and modify any existing test case files or +add new test case files to ensure that the bug is fixed. +""" if __name__ == "__main__": - swe_agent = SWE() + swe_agent = SWEAgent() logger.info("**** Starting run ****") - user_requirement_and_issue = ( - # FIX_ISSUE1 - # FIX_ISSUE2 - # FIX_ISSUE3 - FIX_ISSUE_SIMPLE - + """Because the environment is not available, you DO NOT need to run and modify any existing test case files or add new test case files to ensure that the bug is fixed.""" - ) + user_requirement_and_issue = FIX_ISSUE1 + NO_ENV_TIP asyncio.run(swe_agent.run(user_requirement_and_issue)) logger.info("**** Finished running ****") - logger.info(f"Patch: {swe_agent.output_diff}") From 9abea45d1e200b9ffc8519de5d7f94a1180ff156 Mon Sep 17 00:00:00 2001 From: your-username Date: Thu, 4 Jul 2024 16:12:49 +0800 Subject: [PATCH 08/10] update: SWE Agent --- metagpt/roles/di/swe_agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/roles/di/swe_agent.py b/metagpt/roles/di/swe_agent.py index e86b50a9d..166de3918 100644 --- a/metagpt/roles/di/swe_agent.py +++ b/metagpt/roles/di/swe_agent.py @@ -29,14 +29,14 @@ class SWEAgent(RoleZero): run_eval: bool = False async def _think(self) -> bool: - self._set_system_msg() + self._update_system_msg() self._format_instruction() res = await super()._think() if self.run_eval: await self._parse_commands_for_eval() return res - def _set_system_msg(self): + def _update_system_msg(self): """ Sets the system message for the SWE agent. From bfb8a1d687e8b0ff576c558cbda2a014deac182f Mon Sep 17 00:00:00 2001 From: your-username Date: Thu, 4 Jul 2024 16:18:36 +0800 Subject: [PATCH 09/10] update: SWE Agent --- metagpt/prompts/di/{swe.py => swe_agent.py} | 0 ...n_sweagent_for_benchmark.py => run_swe_agent_for_benchmark.py} | 0 ...nt_open_source_issue.py => run_swe_agent_open_source_issue.py} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename metagpt/prompts/di/{swe.py => swe_agent.py} (100%) rename tests/metagpt/roles/di/{run_sweagent_for_benchmark.py => run_swe_agent_for_benchmark.py} (100%) rename tests/metagpt/roles/di/{run_sweagent_open_source_issue.py => run_swe_agent_open_source_issue.py} (100%) diff --git a/metagpt/prompts/di/swe.py b/metagpt/prompts/di/swe_agent.py similarity index 100% rename from metagpt/prompts/di/swe.py rename to metagpt/prompts/di/swe_agent.py diff --git a/tests/metagpt/roles/di/run_sweagent_for_benchmark.py b/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py similarity index 100% rename from tests/metagpt/roles/di/run_sweagent_for_benchmark.py rename to tests/metagpt/roles/di/run_swe_agent_for_benchmark.py diff --git a/tests/metagpt/roles/di/run_sweagent_open_source_issue.py b/tests/metagpt/roles/di/run_swe_agent_open_source_issue.py similarity index 100% rename from tests/metagpt/roles/di/run_sweagent_open_source_issue.py rename to tests/metagpt/roles/di/run_swe_agent_open_source_issue.py From 223a79cd1d6477520fa710f1f72ced5fc887a830 Mon Sep 17 00:00:00 2001 From: your-username Date: Thu, 4 Jul 2024 16:25:13 +0800 Subject: [PATCH 10/10] update: SWE Agent --- metagpt/const.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/metagpt/const.py b/metagpt/const.py index 6a0e2c4bb..c78a22641 100644 --- a/metagpt/const.py +++ b/metagpt/const.py @@ -149,8 +149,6 @@ METAGPT_REPORTER_DEFAULT_URL = os.environ.get("METAGPT_REPORTER_URL", "") # Metadata defines AGENT = "agent" -SWE_WORKSPACE_ROOT = Path("/tmp/swe_workspace") -if not SWE_WORKSPACE_ROOT.exists(): - SWE_WORKSPACE_ROOT.mkdir(parents=True) + # SWE agent SWE_SETUP_PATH = METAGPT_ROOT / "metagpt/tools/swe_agent_commands/setup_default.sh"