diff --git a/metagpt/const.py b/metagpt/const.py index c78a22641..94d22bc70 100644 --- a/metagpt/const.py +++ b/metagpt/const.py @@ -149,6 +149,6 @@ METAGPT_REPORTER_DEFAULT_URL = os.environ.get("METAGPT_REPORTER_URL", "") # Metadata defines AGENT = "agent" - +SWE_WORKSPACE_ROOT = Path("/tmp/swe_workspace") # SWE agent SWE_SETUP_PATH = METAGPT_ROOT / "metagpt/tools/swe_agent_commands/setup_default.sh" diff --git a/metagpt/prompts/di/swe.py b/metagpt/prompts/di/swe.py index 64c67b09b..ed1f8a011 100644 --- a/metagpt/prompts/di/swe.py +++ b/metagpt/prompts/di/swe.py @@ -4,19 +4,17 @@ You can find the original examples from the SWE-agent project here: https://github.com/princeton-nlp/SWE-agent/tree/main/config/configs """ - SWE_AGENT_SYSTEM_TEMPLATE = """ -SETTING: You are an autonomous programmer, and you're working directly in the command line with a special interface. +SETTING: You are an autonomous programmer, and you're working directly in the environment line with a special interface. The special interface consists of a file editor that shows you {WINDOW} lines of a file at a time. Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. Pay attention to the original indentation when replacing the function. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. - Always review your changes post-edit to ensure they accurately reflect your intentions. If the changes are not as desired, don't hesitate to issue another command to correct them. Your output should always contain a section of reasoning and a command described in JSON format. -The command must always contain command_name and args fields. The command_name field should always be Bash.run, and the args field should always include a cmd field containing the bash command. + Use \\n to represent line breaks, ensuring the command conforms to the JSON format and is displayed on a single line. Except for the `edit` command, each parameter of the command needs to be enclosed in single quotes. As shown in the example below: @@ -31,42 +29,75 @@ First I'll start by using ls to see what files are in the current directory. The }} ``` - You should only include a *SINGLE* command in the command section and then wait for a response from the shell before continuing with more discussion and commands. Everything you include in the DISCUSSION section will be saved for future reference. If you'd like to issue two commands at once, PLEASE DO NOT DO THAT! Please instead first submit just the first command, and then after receiving a response you'll be able to issue the second command. -You're free to use any other bash commands you want (e.g. find, grep, cat, ls, cd) in addition to the special commands listed above. +Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command. + +You can use any bash commands you want (e.g., find, grep, cat, ls, cd) or any custom special tools (including `edit`) by calling Bash.run. Edit all the files you need. You should carefully observe the behavior and results of the previous action, and avoid triggering repeated errors. -However, the environment does NOT support interactive session commands (e.g. python, vim), so please do not invoke them. +However, the Bash.run does NOT support interactive session commands (e.g. python, vim), so please do not invoke them. + +In addition to the terminal, I also provide additional tools. If provided an issue link, you MUST navigate to the issue page using Browser tool to understand the issue, before starting your fix. + +# INSTRUCTIONS: +Your first action must be to check if the repository exists at the current path. If it exists, navigate to the repository path. If the repository doesn't exist, please download it and then navigate to it. +All subsequent actions must be performed within this repository path. Do not leave this directory to execute any actions at any time. +Your terminal session has started, and you can use any bash commands or the special interface to help you. Edit all the files you need. """ MINIMAL_EXAMPLE = """ ## Example of a actions trajectory User Requirement and Issue: Fix the bug in the repo. Because the environment is not available, you DO NOT need to run and modify any existing test case files or add new test case files to ensure that the bug is fixed. -### Locate issue(Require): Locate the issue in the code by searching for the relevant file, function, or class and open the file to view the code. -cd /workspace/django__django_3.0 +### Read and understand issue(Require): +{{ + "command_name": "Browser.goto", + "args": {{ + "url": "https://github.com/geekan/MetaGPT/issues/1275" + }} +}} -> -search_dir_and_preview ASCIIUsernameValidator --> -open /workspace/django__django_3.0/django/contrib/auth/validators.py --> -### Fix the Bug(Require): Fix the bug in the code by editing the relevant function, class or code snippet. -edit 10:20 < + +Bash.run(cmd='search_dir_and_preview ASCIIUsernameValidator') +{{ + "command_name": "Bash.run", + "args": {{ + "cmd": "open /workspace/django__django_3.0/django/contrib/auth/validators.py" + }} +}} +-> + +### Fix the Bug(Require): Fix the bug in the code by editing the relevant function, class or code snippet. +{{ + "command_name": "Bash.run", + "args": {{ + "cmd": "edit 10:20 < + ### Submit the Changes(Require): Submit the changes to the repository. -submit +{{ + "command_name": "Bash.run", + "args": {{ + "cmd": "submit" + }} +}} +Bash.run(cmd='submit') +-> +{{ + "command_name": "end", +}} """ @@ -132,6 +163,10 @@ IMPORTANT_TIPS = """ - Based on feedback of observation or bash command in trajectory to guide adjustments in your search strategy. 13. If the task results in succeed, fail, or NO PROGRESS, output `submit`. + +14. If provided an issue link, you MUST go to the issue page using Browser tool to understand the issue before starting your fix. + +15. When the edit fails, try to enlarge the starting line. """ NEXT_STEP_TEMPLATE = f""" diff --git a/metagpt/roles/di/swe.py b/metagpt/roles/di/swe.py index 6d357c02b..915d186b4 100644 --- a/metagpt/roles/di/swe.py +++ b/metagpt/roles/di/swe.py @@ -22,8 +22,7 @@ class SWE(RoleZero): _system_msg: str = SWE_AGENT_SYSTEM_TEMPLATE system_msg: list[str] = [_system_msg.format(WINDOW=_bash_window_size)] _instruction: str = NEXT_STEP_TEMPLATE - # tools: list[str] = ["Bash", "Browser"] - tools: list[str] = ["Bash"] + tools: list[str] = ["Bash", "Browser:goto,scroll"] terminal: Bash = Field(default_factory=Bash, exclude=True) output_diff: str = "" max_react_loop: int = 30 @@ -75,11 +74,10 @@ class SWE(RoleZero): if not ok: return for cmd in commands: - if "submit" not in cmd.get("args", {}).get("cmd", ""): + if "end" != cmd.get("command_name", ""): return try: - # Generate patch by git diff - diff_output = self.terminal.run("git diff") + diff_output = self.terminal.run("git diff --cached") clear_diff = extract_patch(diff_output) logger.info(f"Diff output: \n{clear_diff}") if clear_diff: diff --git a/metagpt/tools/libs/browser.py b/metagpt/tools/libs/browser.py index c6ea71bd5..864996e8c 100644 --- a/metagpt/tools/libs/browser.py +++ b/metagpt/tools/libs/browser.py @@ -122,6 +122,8 @@ class Browser: async def goto(self, url: str, timeout: float = 30000): """Navigate to a specific URL.""" + if self.page is None: + await self.start() async with self.reporter as reporter: await reporter.async_report(url, "url") await self.page.goto(url, timeout=timeout) diff --git a/metagpt/tools/libs/terminal.py b/metagpt/tools/libs/terminal.py index 938eadff4..a04acb8e9 100644 --- a/metagpt/tools/libs/terminal.py +++ b/metagpt/tools/libs/terminal.py @@ -2,7 +2,7 @@ import subprocess import threading from queue import Queue -from metagpt.const import SWE_SETUP_PATH +from metagpt.const import SWE_SETUP_PATH, SWE_WORKSPACE_ROOT from metagpt.tools.tool_registry import register_tool from metagpt.utils.report import END_MARKER_VALUE, TerminalReporter @@ -136,13 +136,14 @@ class Terminal: class Bash(Terminal): """ A class to run bash commands directly and provides custom shell functions. + All custom functions in this class can ONLY be called via the `Bash.run` method. """ def __init__(self): """init""" super().__init__() setup_cmd = f"source {SWE_SETUP_PATH}" - self.run_command(setup_cmd) + self.run_command(f"cd {SWE_WORKSPACE_ROOT} && {setup_cmd}") def run(self, cmd) -> str: """ @@ -184,7 +185,7 @@ class Bash(Terminal): filename (str): The name of the file to create. - submit - Submits your current code and terminates the session. + Submits your current code. it can only be executed once, the last action before the `end`. - search_dir_and_preview [] Searches for search_term in all files in dir and gives their code preview diff --git a/metagpt/tools/swe_agent_commands/defaults.sh b/metagpt/tools/swe_agent_commands/defaults.sh index f0898aabc..d416dcbf5 100644 --- a/metagpt/tools/swe_agent_commands/defaults.sh +++ b/metagpt/tools/swe_agent_commands/defaults.sh @@ -177,7 +177,7 @@ create() { # @yaml # signature: submit -# docstring: submits your current code and terminates the session. this is the only submit action needed; no need to run git add or git commit before this. +# docstring: submits your current code. the last action before the `end`, it can only be executed once. submit() { # Check if the patch file exists and is non-empty if [ -s "$SWE_CMD_WORK_DIR/test.patch" ]; then @@ -186,8 +186,7 @@ submit() { fi git add -A - git diff --cached > model.patch - echo "<>" + echo "<>" } diff --git a/metagpt/tools/swe_agent_commands/swe_agent_utils.py b/metagpt/tools/swe_agent_commands/swe_agent_utils.py index 8c01dc9c9..9e293f4d2 100644 --- a/metagpt/tools/swe_agent_commands/swe_agent_utils.py +++ b/metagpt/tools/swe_agent_commands/swe_agent_utils.py @@ -16,10 +16,12 @@ def extract_patch(command_output): def load_hf_dataset(dataset_name_or_path: str, cache_dir, split: str = "test", existing_ids: list = []): - if Path(dataset_name_or_path).exists(): - dataset = load_from_disk(dataset_name_or_path) + data_dir = cache_dir / dataset_name_or_path + if Path(data_dir).exists(): + dataset = load_from_disk(data_dir) else: - dataset = load_dataset(dataset_name_or_path, cache_dir=cache_dir) + dataset = load_dataset(dataset_name_or_path) + dataset.save_to_disk(data_dir) print(dataset) if split not in dataset: raise ValueError(f"Invalid split {split} for dataset {dataset_name_or_path}") diff --git a/tests/metagpt/roles/di/run_swe.py b/tests/metagpt/roles/di/run_swe.py index c6cc56fd1..f9d19be74 100644 --- a/tests/metagpt/roles/di/run_swe.py +++ b/tests/metagpt/roles/di/run_swe.py @@ -12,7 +12,7 @@ from metagpt.tools.swe_agent_commands.swe_agent_utils import load_hf_dataset # Specify by yourself TEST_REPO_DIR = Path("/Users/seeker/Projects/sdfz/mg/mg-swe-agent") / "benchmark" / "swe_bench" / "data" / "test_repo" -DATA_DIR = METAGPT_ROOT / "benchmark" / "swe_bench" / "data" +DATA_DIR = METAGPT_ROOT / "data/hugging_face" INSTANCE_TEMPLATE = """ ## User Requirement @@ -27,8 +27,9 @@ hints text is the comment under issue: {hints_text} The repository may already exist at the path `{repo_path}`. If it doesn't, please download the repository to this path. -All your subsequent actions should use the project path as your root directory, and you should never leave that directory to execute any actions. +Your first action must be to navigate to the repository path `{repo_path}`. This issue occurred in version {version}, with the corresponding base commit being {base_commit}. You need to switch to the code version associated with this commit. +All subsequent actions must be performed within this repository path. Do not leave this directory to execute any actions at any time. # INSTRUCTIONS: Now, you're going to solve this issue on your own from the perspective of a programmer. Your terminal session has started and you're in the repository's root directory. You can use any bash commands or the special interface to help you. Edit all the files you need. @@ -36,38 +37,6 @@ Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for f """ -def split_dataset_equally(dataset): - # 计算索引 - # fixme: 设置django - - part1 = dataset.filter( - lambda x: x["repo"] - not in [ - "django/django", - "sympy/sympy", - "pytest-dev/pytest", - ], - desc="Filtering out existing ids", - load_from_cache_file=True, - ) - - part2 = dataset.filter( - lambda x: x["repo"] in ["sympy/sympy", "pytest-dev/pytest"], - desc="Filtering out existing ids", - load_from_cache_file=True, - ) - - part3 = dataset.filter( - lambda x: x["repo"] in ["django/django"], - desc="Filtering out existing ids", - load_from_cache_file=False, - ) - - print(len(part1), len(part2), len(part3)) - - return [part1, part2, part3] - - def check_instance_status(instance, swe_result_dir): output_file = swe_result_dir / "all_preds.jsonl" res = True @@ -87,12 +56,20 @@ async def run(instance, swe_result_dir): logger.info(f"Instance {instance['instance_id']} already exists, skipping execution.") return - repo_path = TEST_REPO_DIR / (instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"]) - """ - All your subsequent actions should use the project path as your root directory, and you should never leave that directory to execute any actions. - """ + repo_path = Path("/Users/seeker/Projects/other/test_repo") / ( + instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"] + ) + # repo_path = Path("/Users/seeker/Projects/other/test_repo") / instance["repo"].split("/")[-1] + + # 前处理 terminal = Terminal() - terminal.run_command(f"cd {repo_path} && git checkout . && git clean -n -d && git clean -f -d") + terminal.run_command(f"cd {repo_path} && git reset --hard && git clean -n -d && git clean -f -d") + terminal.run_command("BRANCH=$(git remote show origin | awk '/HEAD branch/ {print $NF}')") + logger.info(terminal.run_command("echo $BRANCH")) + # logger.info(terminal.run_command(f'Branch name: $BRANCH')) + logger.info(terminal.run_command('git checkout "$BRANCH"')) + logger.info(terminal.run_command("git branch")) + user_requirement_and_issue = INSTANCE_TEMPLATE.format( issue=instance["problem_statement"], hints_text=instance["hints_text"], @@ -126,16 +103,14 @@ async def async_main(): dataset_path = "manna-ai/SWE-bench_Nano" # "princeton-nlp/SWE-bench_Lite" #"manna-ai/SWE-bench_Nano" dataset = load_hf_dataset(dataset_name_or_path=dataset_path, cache_dir=DATA_DIR, split="test") - sample_datasets = split_dataset_equally(dataset) date_time = datetime.now().strftime("%m-%d") - round_ = "third" - - for idx, sub_dataset in enumerate(sample_datasets): - exp_name = f"nano_mgx_{date_time}_{round_}_part_{idx}" - swe_result_dir = DEFAULT_WORKSPACE_ROOT / f"result_{config.llm.model}" / exp_name - swe_result_dir.mkdir(parents=True, exist_ok=True) - for instance in sub_dataset: - await run(instance, swe_result_dir) + # _round = "first" + _round = "second" + exp_name = f"nano_mgx_{date_time}_{_round}" + swe_result_dir = DEFAULT_WORKSPACE_ROOT / f"result_{config.llm.model.replace('/', '_')}" / exp_name + swe_result_dir.mkdir(parents=True, exist_ok=True) + for instance in dataset: + await run(instance, swe_result_dir) if __name__ == "__main__":