update: swe

2026-05-05 05:42:37 +02:00 · 2024-07-02 21:26:21 +08:00 · 2024-07-02 21:26:21 +08:00 · 92f94862cb
commit 92f94862cb
parent 96602292b7
7 changed files with 57 additions and 373 deletions
--- a/metagpt/roles/di/role_zero.py
+++ b/metagpt/roles/di/role_zero.py
@ -4,7 +4,7 @@ import inspect
 import json
 import re
 import traceback
-from typing import Callable, Dict, List, Literal, Tuple, Union
+from typing import Callable, Dict, List, Literal, Tuple

 from pydantic import model_validator

@ -167,7 +167,7 @@ class RoleZero(Role):
        if self.use_fixed_sop:
            return await super()._act()

-        commands, ok = await self._get_commands()
+        commands, ok = await self._parse_commands()
        if not ok:
            error_msg = commands
            return error_msg
@ -202,7 +202,7 @@ class RoleZero(Role):
            actions_taken += 1
        return rsp  # return output from the last action

-    async def _get_commands(self) -> Tuple[Union[UserMessage, List[Dict]], bool]:
+    async def _parse_commands(self) -> Tuple[List[Dict], bool]:
        """Retrieves commands from the Large Language Model (LLM).

        This function attempts to retrieve a list of commands from the LLM by
@ -211,7 +211,6 @@ class RoleZero(Role):

        Returns:
            A tuple containing:
-                - A `UserMessage` object or dict representing the commands.
                - A boolean flag indicating success (True) or failure (False).
        """
        try:
--- a/metagpt/roles/di/swe.py
+++ b/metagpt/roles/di/swe.py
@ -22,6 +22,7 @@ class SWE(RoleZero):
    _system_msg: str = SWE_AGENT_SYSTEM_TEMPLATE
    system_msg: list[str] = [_system_msg.format(WINDOW=_bash_window_size)]
    _instruction: str = NEXT_STEP_TEMPLATE
+    # tools: list[str] = ["Bash", "Browser"]
    tools: list[str] = ["Bash"]
    terminal: Bash = Field(default_factory=Bash, exclude=True)
    output_diff: str = ""
@ -35,11 +36,23 @@ class SWE(RoleZero):
        return res

    def _set_system_msg(self):
+        """
+        Sets the system message for the SWE agent.
+
+        Sets the `_bash_window_size` from the environment variable `WINDOW` if it exists.
+        Formats the `_system_msg` template with the current `_bash_window_size`.
+        """
        if os.getenv("WINDOW"):
            self._bash_window_size = int(os.getenv("WINDOW"))
        self.system_msg = [self._system_msg.format(WINDOW=self._bash_window_size)]

    def _format_instruction(self):
+        """
+        Formats the instruction message for the SWE agent.
+
+        Runs the "state" command in the terminal, parses its output as JSON,
+        and uses it to format the `_instruction` template.
+        """
        state_output = self.terminal.run("state")
        bash_state = json.loads(state_output)

@ -50,7 +63,15 @@ class SWE(RoleZero):
        return self.instruction

    async def _handle_action(self):
-        commands, ok = await self._get_commands()
+        """
+        Handles actions based on parsed commands.
+
+        Parses commands, checks for a "submit" action, and generates a patch using `git diff`.
+        Stores the cleaned patch in `output_diff`. Logs any exceptions.
+
+        This function is specifically added for SWE bench evaluation.
+        """
+        commands, ok = await self._parse_commands()
        if not ok:
            return
        for cmd in commands: