update: swe

This commit is contained in:
seeker 2024-07-02 21:26:21 +08:00
parent 96602292b7
commit 92f94862cb
7 changed files with 57 additions and 373 deletions

View file

@ -4,7 +4,7 @@ import inspect
import json
import re
import traceback
from typing import Callable, Dict, List, Literal, Tuple, Union
from typing import Callable, Dict, List, Literal, Tuple
from pydantic import model_validator
@ -167,7 +167,7 @@ class RoleZero(Role):
if self.use_fixed_sop:
return await super()._act()
commands, ok = await self._get_commands()
commands, ok = await self._parse_commands()
if not ok:
error_msg = commands
return error_msg
@ -202,7 +202,7 @@ class RoleZero(Role):
actions_taken += 1
return rsp # return output from the last action
async def _get_commands(self) -> Tuple[Union[UserMessage, List[Dict]], bool]:
async def _parse_commands(self) -> Tuple[List[Dict], bool]:
"""Retrieves commands from the Large Language Model (LLM).
This function attempts to retrieve a list of commands from the LLM by
@ -211,7 +211,6 @@ class RoleZero(Role):
Returns:
A tuple containing:
- A `UserMessage` object or dict representing the commands.
- A boolean flag indicating success (True) or failure (False).
"""
try:

View file

@ -22,6 +22,7 @@ class SWE(RoleZero):
_system_msg: str = SWE_AGENT_SYSTEM_TEMPLATE
system_msg: list[str] = [_system_msg.format(WINDOW=_bash_window_size)]
_instruction: str = NEXT_STEP_TEMPLATE
# tools: list[str] = ["Bash", "Browser"]
tools: list[str] = ["Bash"]
terminal: Bash = Field(default_factory=Bash, exclude=True)
output_diff: str = ""
@ -35,11 +36,23 @@ class SWE(RoleZero):
return res
def _set_system_msg(self):
"""
Sets the system message for the SWE agent.
Sets the `_bash_window_size` from the environment variable `WINDOW` if it exists.
Formats the `_system_msg` template with the current `_bash_window_size`.
"""
if os.getenv("WINDOW"):
self._bash_window_size = int(os.getenv("WINDOW"))
self.system_msg = [self._system_msg.format(WINDOW=self._bash_window_size)]
def _format_instruction(self):
"""
Formats the instruction message for the SWE agent.
Runs the "state" command in the terminal, parses its output as JSON,
and uses it to format the `_instruction` template.
"""
state_output = self.terminal.run("state")
bash_state = json.loads(state_output)
@ -50,7 +63,15 @@ class SWE(RoleZero):
return self.instruction
async def _handle_action(self):
commands, ok = await self._get_commands()
"""
Handles actions based on parsed commands.
Parses commands, checks for a "submit" action, and generates a patch using `git diff`.
Stores the cleaned patch in `output_diff`. Logs any exceptions.
This function is specifically added for SWE bench evaluation.
"""
commands, ok = await self._parse_commands()
if not ok:
return
for cmd in commands: